postgresql/src/backend/utils/adt/tsginidx.c

/*-------------------------------------------------------------------------
 *
 * tsginidx.c
 *	 GIN support functions for tsvector_ops
 *
 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *	  src/backend/utils/adt/tsginidx.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/gin.h"
#include "access/stratnum.h"
#include "miscadmin.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"


Datum
gin_cmp_tslexeme(PG_FUNCTION_ARGS)
{
	text	   *a = PG_GETARG_TEXT_PP(0);
	text	   *b = PG_GETARG_TEXT_PP(1);
	int			cmp;

	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
						  false);

	PG_FREE_IF_COPY(a, 0);
	PG_FREE_IF_COPY(b, 1);
	PG_RETURN_INT32(cmp);
}

Datum
gin_cmp_prefix(PG_FUNCTION_ARGS)
{
	text	   *a = PG_GETARG_TEXT_PP(0);
	text	   *b = PG_GETARG_TEXT_PP(1);

#ifdef NOT_USED
	StrategyNumber strategy = PG_GETARG_UINT16(2);
	Pointer		extra_data = PG_GETARG_POINTER(3);
#endif
	int			cmp;

	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
						  true);

	if (cmp < 0)
		cmp = 1;				/* prevent continue scan */

	PG_FREE_IF_COPY(a, 0);
	PG_FREE_IF_COPY(b, 1);
	PG_RETURN_INT32(cmp);
}

Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)
{
	TSVector	vector = PG_GETARG_TSVECTOR(0);
	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
	Datum	   *entries = NULL;

	*nentries = vector->size;
	if (vector->size > 0)
	{
		int			i;
		WordEntry  *we = ARRPTR(vector);

		entries = (Datum *) palloc(sizeof(Datum) * vector->size);

		for (i = 0; i < vector->size; i++)
		{
			text	   *txt;

			txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
			entries[i] = PointerGetDatum(txt);

			we++;
		}
	}

	PG_FREE_IF_COPY(vector, 0);
	PG_RETURN_POINTER(entries);
}

Datum
gin_extract_tsquery(PG_FUNCTION_ARGS)
{
	TSQuery		query = PG_GETARG_TSQUERY(0);
	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);

	/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
	bool	  **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);

	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
	Datum	   *entries = NULL;

	*nentries = 0;

	if (query->size > 0)
	{
		QueryItem  *item = GETQUERY(query);
		int32		i,
					j;
		bool	   *partialmatch;
		int		   *map_item_operand;

		/*
		 * If the query doesn't have any required positive matches (for
		 * instance, it's something like '! foo'), we have to do a full index
		 * scan.
		 */
		if (tsquery_requires_match(item))
			*searchMode = GIN_SEARCH_MODE_DEFAULT;
		else
			*searchMode = GIN_SEARCH_MODE_ALL;

		/* count number of VAL items */
		j = 0;
		for (i = 0; i < query->size; i++)
		{
			if (item[i].type == QI_VAL)
				j++;
		}
		*nentries = j;

		entries = (Datum *) palloc(sizeof(Datum) * j);
		partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);

		/*
		 * Make map to convert item's number to corresponding operand's (the
		 * same, entry's) number. Entry's number is used in check array in
		 * consistent method. We use the same map for each entry.
		 */
		*extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
		map_item_operand = (int *) palloc0(sizeof(int) * query->size);

		/* Now rescan the VAL items and fill in the arrays */
		j = 0;
		for (i = 0; i < query->size; i++)
		{
			if (item[i].type == QI_VAL)
			{
				QueryOperand *val = &item[i].qoperand;
				text	   *txt;

				txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
											   val->length);
				entries[j] = PointerGetDatum(txt);
				partialmatch[j] = val->prefix;
				(*extra_data)[j] = (Pointer) map_item_operand;
				map_item_operand[i] = j;
				j++;
			}
		}
	}

	PG_FREE_IF_COPY(query, 0);

	PG_RETURN_POINTER(entries);
}

typedef struct
{
	QueryItem  *first_item;
	GinTernaryValue *check;
	int		   *map_item_operand;
	bool	   *need_recheck;
} GinChkVal;

static GinTernaryValue
checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
{
	int			j;

	/*
	 * if any val requiring a weight is used or caller needs position
	 * information then set recheck flag
	 */
	if (val->weight != 0 || data != NULL)
		*(gcv->need_recheck) = true;

	/* convert item's number to corresponding entry's (operand's) number */
	j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];

	/* return presence of current entry in indexed value */
	return gcv->check[j];
}

/*
 * Wrapper of check condition function for TS_execute.
 */
static bool
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
	return checkcondition_gin_internal((GinChkVal *) checkval,
									   val,
									   data) != GIN_FALSE;
}

/*
 * Evaluate tsquery boolean expression using ternary logic.
 *
 * Note: the reason we can't use TS_execute() for this is that its API
 * for the checkcondition callback doesn't allow a MAYBE result to be
 * returned, but we might have MAYBEs in the gcv->check array.
 * Perhaps we should change that API.
 */
static GinTernaryValue
TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
{
	GinTernaryValue val1,
				val2,
				result;

	/* since this function recurses, it could be driven to stack overflow */
	check_stack_depth();

	if (curitem->type == QI_VAL)
		return
			checkcondition_gin_internal(gcv,
										(QueryOperand *) curitem,
										NULL /* don't have position info */ );

	switch (curitem->qoperator.oper)
	{
		case OP_NOT:

			/*
			 * Below a phrase search, force NOT's result to MAYBE.  We cannot
			 * invert a TRUE result from the subexpression to FALSE, since
			 * TRUE only says that the subexpression matches somewhere, not
			 * that it matches everywhere, so there might be positions where
			 * the NOT will match.  We could invert FALSE to TRUE, but there's
			 * little point in distinguishing TRUE from MAYBE, since a recheck
			 * will have been forced already.
			 */
			if (in_phrase)
				return GIN_MAYBE;

			result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
			if (result == GIN_MAYBE)
				return result;
			return !result;

		case OP_PHRASE:

			/*
			 * GIN doesn't contain any information about positions, so treat
			 * OP_PHRASE as OP_AND with recheck requirement, and always
			 * reporting MAYBE not TRUE.
			 */
			*(gcv->need_recheck) = true;
			/* Pass down in_phrase == true in case there's a NOT below */
			in_phrase = true;

			/* FALL THRU */

		case OP_AND:
			val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
									  in_phrase);
			if (val1 == GIN_FALSE)
				return GIN_FALSE;
			val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
			if (val2 == GIN_FALSE)
				return GIN_FALSE;
			if (val1 == GIN_TRUE && val2 == GIN_TRUE &&
				curitem->qoperator.oper != OP_PHRASE)
				return GIN_TRUE;
			else
				return GIN_MAYBE;

		case OP_OR:
			val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
									  in_phrase);
			if (val1 == GIN_TRUE)
				return GIN_TRUE;
			val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
			if (val2 == GIN_TRUE)
				return GIN_TRUE;
			if (val1 == GIN_FALSE && val2 == GIN_FALSE)
				return GIN_FALSE;
			else
				return GIN_MAYBE;

		default:
			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
	}

	/* not reachable, but keep compiler quiet */
	return false;
}

Datum
gin_tsquery_consistent(PG_FUNCTION_ARGS)
{
	bool	   *check = (bool *) PG_GETARG_POINTER(0);

	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
	TSQuery		query = PG_GETARG_TSQUERY(2);

	/* int32	nkeys = PG_GETARG_INT32(3); */
	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
	bool		res = false;

	/* Initially assume query doesn't require recheck */
	*recheck = false;

	if (query->size > 0)
	{
		GinChkVal	gcv;

		/*
		 * check-parameter array has one entry for each value (operand) in the
		 * query.
		 */
		gcv.first_item = GETQUERY(query);
		StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
						 "sizes of GinTernaryValue and bool are not equal");
		gcv.check = (GinTernaryValue *) check;
		gcv.map_item_operand = (int *) (extra_data[0]);
		gcv.need_recheck = recheck;

		res = TS_execute(GETQUERY(query),
						 &gcv,
						 TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
						 checkcondition_gin);
	}

	PG_RETURN_BOOL(res);
}

Datum
gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
{
	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);

	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
	TSQuery		query = PG_GETARG_TSQUERY(2);

	/* int32	nkeys = PG_GETARG_INT32(3); */
	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
	GinTernaryValue res = GIN_FALSE;
	bool		recheck;

	/* Initially assume query doesn't require recheck */
	recheck = false;

	if (query->size > 0)
	{
		GinChkVal	gcv;

		/*
		 * check-parameter array has one entry for each value (operand) in the
		 * query.
		 */
		gcv.first_item = GETQUERY(query);
		gcv.check = check;
		gcv.map_item_operand = (int *) (extra_data[0]);
		gcv.need_recheck = &recheck;

		res = TS_execute_ternary(&gcv, GETQUERY(query), false);

		if (res == GIN_TRUE && recheck)
			res = GIN_MAYBE;
	}

	PG_RETURN_GIN_TERNARY_VALUE(res);
}

/*
 * Formerly, gin_extract_tsvector had only two arguments.  Now it has three,
 * but we still need a pg_proc entry with two args to support reloading
 * pre-9.1 contrib/tsearch2 opclass declarations.  This compatibility
 * function should go away eventually.  (Note: you might say "hey, but the
 * code above is only *using* two args, so let's just declare it that way".
 * If you try that you'll find the opr_sanity regression test complains.)
 */
Datum
gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
{
	if (PG_NARGS() < 3)			/* should not happen */
		elog(ERROR, "gin_extract_tsvector requires three arguments");
	return gin_extract_tsvector(fcinfo);
}

/*
 * Likewise, we need a stub version of gin_extract_tsquery declared with
 * only five arguments.
 */
Datum
gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
{
	if (PG_NARGS() < 7)			/* should not happen */
		elog(ERROR, "gin_extract_tsquery requires seven arguments");
	return gin_extract_tsquery(fcinfo);
}

/*
 * Likewise, we need a stub version of gin_tsquery_consistent declared with
 * only six arguments.
 */
Datum
gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
{
	if (PG_NARGS() < 8)			/* should not happen */
		elog(ERROR, "gin_tsquery_consistent requires eight arguments");
	return gin_tsquery_consistent(fcinfo);
}

/*
 * Likewise, a stub version of gin_extract_tsquery declared with argument
 * types that are no longer considered appropriate.
 */
Datum
gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
{
	return gin_extract_tsquery(fcinfo);
}

/*
 * Likewise, a stub version of gin_tsquery_consistent declared with argument
 * types that are no longer considered appropriate.
 */
Datum
gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
{
	return gin_tsquery_consistent(fcinfo);
}