Convert tsqueryin and tsvectorin to report errors softly.

This is slightly tedious because the adjustments cascade through
a couple of levels of subroutines, but it's not very hard.
I chose to avoid changing function signatures more than absolutely
necessary, by passing the escontext pointer in existing structs
where possible.

tsquery's nuisance NOTICEs about empty queries are suppressed in
soft-error mode, since they're not errors and we surely don't want
them to be shown to the user anyway.  Maybe that whole behavior
should be reconsidered.

Discussion: https://postgr.es/m/3824377.1672076822@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2022-12-27 12:00:31 -05:00
parent eb8312a22a
commit 78212f2101
8 changed files with 196 additions and 52 deletions

View File

@ -594,7 +594,8 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
0);
0,
NULL);
PG_RETURN_TSQUERY(query);
}
@ -630,7 +631,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
P_TSQ_PLAIN);
P_TSQ_PLAIN,
NULL);
PG_RETURN_POINTER(query);
}
@ -667,7 +669,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
P_TSQ_PLAIN);
P_TSQ_PLAIN,
NULL);
PG_RETURN_TSQUERY(query);
}
@ -704,7 +707,8 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
P_TSQ_WEB);
P_TSQ_WEB,
NULL);
PG_RETURN_TSQUERY(query);
}

View File

@ -16,6 +16,7 @@
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
@ -58,10 +59,16 @@ typedef enum
/*
* get token from query string
*
* *operator is filled in with OP_* when return values is PT_OPR,
* but *weight could contain a distance value in case of phrase operator.
* *strval, *lenval and *weight are filled in when return value is PT_VAL
* All arguments except "state" are output arguments.
*
* If return value is PT_OPR, then *operator is filled with an OP_* code
* and *weight will contain a distance value in case of phrase operator.
*
* If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
* are filled.
*
* If PT_ERR is returned then a soft error has occurred. If state->escontext
* isn't already filled then this should be reported as a generic parse error.
*/
typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
int *lenval, char **strval,
@ -93,6 +100,9 @@ struct TSQueryParserStateData
/* state for value's parser */
TSVectorParseState valstate;
/* context object for soft errors - must match valstate's escontext */
Node *escontext;
};
/*
@ -194,7 +204,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
if (ptr == endptr)
return false;
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
ereport(ERROR,
ereturn(pstate->escontext, false,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
MAXENTRYPOS)));
@ -301,10 +311,8 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
}
else if (t_iseq(state->buf, ':'))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
/* generic syntax error message is fine */
return PT_ERR;
}
else if (!t_isspace(state->buf))
{
@ -320,12 +328,17 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
state->state = WAITOPERATOR;
return PT_VAL;
}
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* gettoken_tsvector reported a soft error */
return PT_ERR;
}
else if (state->state == WAITFIRSTOPERAND)
{
return PT_END;
}
else
ereport(ERROR,
ereturn(state->escontext, PT_ERR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no operand in tsquery: \"%s\"",
state->buffer)));
@ -354,6 +367,11 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
*operator = OP_PHRASE;
return PT_OPR;
}
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* parse_phrase_operator reported a soft error */
return PT_ERR;
}
else if (t_iseq(state->buf, ')'))
{
state->buf++;
@ -438,6 +456,11 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
state->state = WAITOPERATOR;
return PT_VAL;
}
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* gettoken_tsvector reported a soft error */
return PT_ERR;
}
else if (state->state == WAITFIRSTOPERAND)
{
return PT_END;
@ -529,12 +552,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
QueryOperand *tmp;
if (distance >= MAXSTRPOS)
ereport(ERROR,
ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("value is too big in tsquery: \"%s\"",
state->buffer)));
if (lenval >= MAXSTRLEN)
ereport(ERROR,
ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("operand is too long in tsquery: \"%s\"",
state->buffer)));
@ -562,7 +585,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool
pg_crc32 valcrc;
if (lenval >= MAXSTRLEN)
ereport(ERROR,
ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long in tsquery: \"%s\"",
state->buffer)));
@ -686,11 +709,17 @@ makepol(TSQueryParserState state,
return;
case PT_ERR:
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
/* don't overwrite a soft error saved by gettoken function */
if (!SOFT_ERROR_OCCURRED(state->escontext))
errsave(state->escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
return;
}
/* detect soft error in pushval or recursion */
if (SOFT_ERROR_OCCURRED(state->escontext))
return;
}
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@ -769,6 +798,8 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
/*
* Parse the tsquery stored in "buf".
*
* Each value (operand) in the query is passed to pushval. pushval can
* transform the simple value to an arbitrarily complex expression using
* pushValue and pushOperator. It must push a single value with pushValue,
@ -778,12 +809,19 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
*
* opaque is passed on to pushval as is, pushval can use it to store its
* private state.
*
* The pushval function can record soft errors via escontext.
* Callers must check SOFT_ERROR_OCCURRED to detect that.
*
* A bitmask of flags (see ts_utils.h) and an error context object
* can be provided as well. If a soft error occurs, NULL is returned.
*/
TSQuery
parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
int flags)
int flags,
Node *escontext)
{
struct TSQueryParserStateData state;
int i;
@ -791,6 +829,7 @@ parse_tsquery(char *buf,
int commonlen;
QueryItem *ptr;
ListCell *cell;
bool noisy;
bool needcleanup;
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
@ -808,15 +847,19 @@ parse_tsquery(char *buf,
else
state.gettoken = gettoken_query_standard;
/* emit nuisance NOTICEs only if not doing soft errors */
noisy = !(escontext && IsA(escontext, ErrorSaveContext));
/* init state */
state.buffer = buf;
state.buf = buf;
state.count = 0;
state.state = WAITFIRSTOPERAND;
state.polstr = NIL;
state.escontext = escontext;
/* init value parser's state */
state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
/* init list of operand */
state.sumlen = 0;
@ -829,11 +872,15 @@ parse_tsquery(char *buf,
close_tsvector_parser(state.valstate);
if (SOFT_ERROR_OCCURRED(escontext))
return NULL;
if (state.polstr == NIL)
{
ereport(NOTICE,
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
state.buffer)));
if (noisy)
ereport(NOTICE,
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
state.buffer)));
query = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
@ -841,7 +888,7 @@ parse_tsquery(char *buf,
}
if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
ereport(ERROR,
ereturn(escontext, NULL,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("tsquery is too large")));
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
@ -889,7 +936,7 @@ parse_tsquery(char *buf,
* If there are QI_VALSTOP nodes, delete them and simplify the tree.
*/
if (needcleanup)
query = cleanup_tsquery_stopwords(query);
query = cleanup_tsquery_stopwords(query, noisy);
return query;
}
@ -908,8 +955,13 @@ Datum
tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
Node *escontext = fcinfo->context;
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
PG_RETURN_TSQUERY(parse_tsquery(in,
pushval_asis,
PointerGetDatum(NULL),
0,
escontext));
}
/*

View File

@ -383,7 +383,7 @@ calcstrlen(NODE *node)
* Remove QI_VALSTOP (stopword) nodes from TSQuery.
*/
TSQuery
cleanup_tsquery_stopwords(TSQuery in)
cleanup_tsquery_stopwords(TSQuery in, bool noisy)
{
int32 len,
lenstr,
@ -403,8 +403,9 @@ cleanup_tsquery_stopwords(TSQuery in)
root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
if (root == NULL)
{
ereport(NOTICE,
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
if (noisy)
ereport(NOTICE,
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
out = palloc(HDRSIZETQ);
out->size = 0;
SET_VARSIZE(out, HDRSIZETQ);

View File

@ -15,6 +15,7 @@
#include "postgres.h"
#include "libpq/pqformat.h"
#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
@ -178,6 +179,7 @@ Datum
tsvectorin(PG_FUNCTION_ARGS)
{
char *buf = PG_GETARG_CSTRING(0);
Node *escontext = fcinfo->context;
TSVectorParseState state;
WordEntryIN *arr;
int totallen;
@ -201,7 +203,7 @@ tsvectorin(PG_FUNCTION_ARGS)
char *cur;
int buflen = 256; /* allocated size of tmpbuf */
state = init_tsvector_parser(buf, 0);
state = init_tsvector_parser(buf, 0, escontext);
arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
@ -210,14 +212,14 @@ tsvectorin(PG_FUNCTION_ARGS)
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
{
if (toklen >= MAXSTRLEN)
ereport(ERROR,
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long (%ld bytes, max %ld bytes)",
(long) toklen,
(long) (MAXSTRLEN - 1))));
if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR,
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
(long) (cur - tmpbuf), (long) MAXSTRPOS)));
@ -261,13 +263,17 @@ tsvectorin(PG_FUNCTION_ARGS)
close_tsvector_parser(state);
/* Did gettoken_tsvector fail? */
if (SOFT_ERROR_OCCURRED(escontext))
PG_RETURN_NULL();
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
else
buflen = 0;
if (buflen > MAXSTRPOS)
ereport(ERROR,
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
@ -285,6 +291,7 @@ tsvectorin(PG_FUNCTION_ARGS)
stroff += arr[i].entry.len;
if (arr[i].entry.haspos)
{
/* This should be unreachable because of MAXNUMPOS restrictions */
if (arr[i].poslen > 0xFFFF)
elog(ERROR, "positions array too long");

View File

@ -20,9 +20,19 @@
/*
* Private state of tsvector parser. Note that tsquery also uses this code to
* parse its input, hence the boolean flags. The two flags are both true or
* both false in current usage, but we keep them separate for clarity.
* parse its input, hence the boolean flags. The oprisdelim and is_tsquery
* flags are both true or both false in current usage, but we keep them
* separate for clarity.
*
* If oprisdelim is set, the following characters are treated as delimiters
* (in addition to whitespace): ! | & ( )
*
* is_tsquery affects *only* the content of error messages.
*
* is_web can be true to further modify tsquery parsing.
*
* If escontext is an ErrorSaveContext node, then soft errors can be
* captured there rather than being thrown.
*/
struct TSVectorParseStateData
{
@ -34,16 +44,17 @@ struct TSVectorParseStateData
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
bool is_web; /* we're in websearch_to_tsquery() */
Node *escontext; /* for soft error reporting */
};
/*
* Initializes parser for the input string. If oprisdelim is set, the
* following characters are treated as delimiters in addition to whitespace:
* ! | & ( )
* Initializes a parser state object for the given input string.
* A bitmask of flags (see ts_utils.h) and an error context object
* can be provided as well.
*/
TSVectorParseState
init_tsvector_parser(char *input, int flags)
init_tsvector_parser(char *input, int flags, Node *escontext)
{
TSVectorParseState state;
@ -56,12 +67,15 @@ init_tsvector_parser(char *input, int flags)
state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
state->is_web = (flags & P_TSV_IS_WEB) != 0;
state->escontext = escontext;
return state;
}
/*
* Reinitializes parser to parse 'input', instead of previous input.
*
* Note that bufstart (the string reported in errors) is not changed.
*/
void
reset_tsvector_parser(TSVectorParseState state, char *input)
@ -122,23 +136,26 @@ do { \
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
#define PRSSYNTAXERROR prssyntaxerror(state)
#define PRSSYNTAXERROR return prssyntaxerror(state)
static void
static bool
prssyntaxerror(TSVectorParseState state)
{
ereport(ERROR,
errsave(state->escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
state->is_tsquery ?
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
/* In soft error situation, return false as convenience for caller */
return false;
}
/*
* Get next token from string being parsed. Returns true if successful,
* false if end of input string is reached. On success, these output
* parameters are filled in:
* false if end of input string is reached or soft error.
*
* On success, these output parameters are filled in:
*
* *strval pointer to token
* *lenval length of *strval
@ -149,7 +166,11 @@ prssyntaxerror(TSVectorParseState state)
* *poslen number of elements in *pos_ptr
* *endptr scan resumption point
*
* Pass NULL for unwanted output parameters.
* Pass NULL for any unwanted output parameters.
*
* If state->escontext is an ErrorSaveContext, then caller must check
* SOFT_ERROR_OCCURRED() to determine whether a "false" result means
* error or normal end-of-string.
*/
bool
gettoken_tsvector(TSVectorParseState state,
@ -195,7 +216,7 @@ gettoken_tsvector(TSVectorParseState state,
else if (statecode == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
ereturn(state->escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character: \"%s\"",
state->bufstart)));
@ -313,7 +334,7 @@ gettoken_tsvector(TSVectorParseState state,
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
/* we cannot get here in tsquery, so no need for 2 errmsgs */
if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR,
ereturn(state->escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector: \"%s\"",
state->bufstart)));

View File

@ -25,11 +25,13 @@
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
typedef struct TSVectorParseStateData *TSVectorParseState;
/* flag bits that can be passed to init_tsvector_parser: */
#define P_TSV_OPR_IS_DELIM (1 << 0)
#define P_TSV_IS_TSQUERY (1 << 1)
#define P_TSV_IS_WEB (1 << 2)
extern TSVectorParseState init_tsvector_parser(char *input, int flags);
extern TSVectorParseState init_tsvector_parser(char *input, int flags,
Node *escontext);
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state,
char **strval, int *lenval,
@ -58,13 +60,15 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
* QueryOperand struct */
bool prefix);
/* flag bits that can be passed to parse_tsquery: */
#define P_TSQ_PLAIN (1 << 0)
#define P_TSQ_WEB (1 << 1)
extern TSQuery parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
int flags);
int flags,
Node *escontext);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
@ -222,7 +226,7 @@ extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
* TSQuery Utilities
*/
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
typedef struct QTNode
{

View File

@ -89,6 +89,25 @@ SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
ERROR: syntax error in tsvector: "'' '1' '2'"
LINE 1: SELECT $$'' '1' '2'$$::tsvector;
^
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsvector');
pg_input_is_valid
-------------------
t
(1 row)
SELECT pg_input_is_valid($$''$$, 'tsvector');
pg_input_is_valid
-------------------
f
(1 row)
SELECT pg_input_error_message($$''$$, 'tsvector');
pg_input_error_message
--------------------------------
syntax error in tsvector: "''"
(1 row)
--Base tsquery test
SELECT '1'::tsquery;
tsquery
@ -372,6 +391,31 @@ SELECT '!!a & !!b'::tsquery;
!!'a' & !!'b'
(1 row)
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsquery');
pg_input_is_valid
-------------------
t
(1 row)
SELECT pg_input_is_valid('foo!', 'tsquery');
pg_input_is_valid
-------------------
f
(1 row)
SELECT pg_input_error_message('foo!', 'tsquery');
pg_input_error_message
---------------------------------
syntax error in tsquery: "foo!"
(1 row)
SELECT pg_input_error_message('a <100000> b', 'tsquery');
pg_input_error_message
---------------------------------------------------------------------------------------
distance in phrase operator must be an integer value between zero and 16384 inclusive
(1 row)
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
true

View File

@ -19,6 +19,11 @@ SELECT '''w'':4A,3B,2C,1D,5 a:8';
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsvector');
SELECT pg_input_is_valid($$''$$, 'tsvector');
SELECT pg_input_error_message($$''$$, 'tsvector');
--Base tsquery test
SELECT '1'::tsquery;
SELECT '1 '::tsquery;
@ -68,6 +73,12 @@ SELECT 'a & !!b'::tsquery;
SELECT '!!a & b'::tsquery;
SELECT '!!a & !!b'::tsquery;
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsquery');
SELECT pg_input_is_valid('foo!', 'tsquery');
SELECT pg_input_error_message('foo!', 'tsquery');
SELECT pg_input_error_message('a <100000> b', 'tsquery');
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false";