2007-08-21 03:11:32 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* ts_public.h
|
|
|
|
* Public interface to various tsearch modules, such as
|
|
|
|
* parsers and dictionaries.
|
|
|
|
*
|
2023-01-02 21:00:37 +01:00
|
|
|
* Copyright (c) 1998-2023, PostgreSQL Global Development Group
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/tsearch/ts_public.h
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef _PG_TS_PUBLIC_H_
|
|
|
|
#define _PG_TS_PUBLIC_H_
|
|
|
|
|
|
|
|
#include "tsearch/ts_type.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parser's framework
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* returning type for prslextype method of parser
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
int lexid;
|
|
|
|
char *alias;
|
|
|
|
char *descr;
|
|
|
|
} LexDescr;
|
|
|
|
|
|
|
|
/*
|
2022-11-21 23:07:07 +01:00
|
|
|
* Interface to headline generator (tsparser's prsheadline function)
|
|
|
|
*
|
|
|
|
* HeadlineParsedText describes the text that is to be highlighted.
|
|
|
|
* Some fields are passed from the core code to the prsheadline function,
|
|
|
|
* while others are output from the prsheadline function.
|
|
|
|
*
|
|
|
|
* The principal data is words[], an array of HeadlineWordEntry,
|
|
|
|
* one entry per token, of length curwords.
|
|
|
|
* The fields of HeadlineWordEntry are:
|
|
|
|
*
|
|
|
|
* in, selected, replace, skip: these flags are initially zero
|
|
|
|
* and may be set by the prsheadline function. A consecutive group
|
|
|
|
* of tokens marked "in" form a "fragment" to be output.
|
|
|
|
* Such tokens may additionally be marked selected, replace, or skip
|
|
|
|
* to modify how they are shown. (If you set more than one of those
|
|
|
|
* bits, you get an unspecified one of those behaviors.)
|
|
|
|
*
|
|
|
|
* type, len, pos, word: filled by core code to describe the token.
|
|
|
|
*
|
|
|
|
* item: if the token matches any operand of the tsquery of interest,
|
|
|
|
* a pointer to such an operand. (If there are multiple matching
|
|
|
|
* operands, we generate extra copies of the HeadlineWordEntry to hold
|
|
|
|
* all the pointers. The extras are marked with repeated = 1 and should
|
|
|
|
* be ignored except for checking the item pointer.)
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
2022-11-21 23:07:07 +01:00
|
|
|
uint32 selected:1, /* token is to be highlighted */
|
|
|
|
in:1, /* token is part of headline */
|
|
|
|
replace:1, /* token is to be replaced with a space */
|
|
|
|
repeated:1, /* duplicate entry to hold item pointer */
|
|
|
|
skip:1, /* token is to be skipped (not output) */
|
|
|
|
unused:3, /* available bits */
|
|
|
|
type:8, /* parser's token category */
|
|
|
|
len:16; /* length of token */
|
|
|
|
WordEntryPos pos; /* position of token */
|
|
|
|
char *word; /* text of token (not null-terminated) */
|
|
|
|
QueryOperand *item; /* a matching query operand, or NULL if none */
|
2007-08-21 03:11:32 +02:00
|
|
|
} HeadlineWordEntry;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
2022-11-21 23:07:07 +01:00
|
|
|
/* Fields filled by core code before calling prsheadline function: */
|
2007-08-21 03:11:32 +02:00
|
|
|
HeadlineWordEntry *words;
|
2022-11-21 23:07:07 +01:00
|
|
|
int32 lenwords; /* allocated length of words[] */
|
|
|
|
int32 curwords; /* current number of valid entries */
|
|
|
|
int32 vectorpos; /* used by ts_parse.c in filling pos fields */
|
|
|
|
|
|
|
|
/* The prsheadline function must fill these fields: */
|
|
|
|
/* Strings for marking selected tokens and separating fragments: */
|
|
|
|
char *startsel; /* palloc'd strings */
|
2007-08-21 03:11:32 +02:00
|
|
|
char *stopsel;
|
2008-10-17 20:05:19 +02:00
|
|
|
char *fragdelim;
|
2022-11-21 23:07:07 +01:00
|
|
|
int16 startsellen; /* lengths of strings */
|
2012-06-25 00:51:46 +02:00
|
|
|
int16 stopsellen;
|
|
|
|
int16 fragdelimlen;
|
2007-08-21 03:11:32 +02:00
|
|
|
} HeadlineParsedText;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Common useful things for tsearch subsystem
|
|
|
|
*/
|
|
|
|
extern char *get_tsearch_config_filename(const char *basename,
|
|
|
|
const char *extension);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Often useful stopword list management
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
char **stop;
|
|
|
|
} StopList;
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
extern void readstoplist(const char *fname, StopList *s,
|
2007-11-09 23:37:35 +01:00
|
|
|
char *(*wordop) (const char *));
|
2007-08-25 02:03:59 +02:00
|
|
|
extern bool searchstoplist(StopList *s, char *key);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Interface with dictionaries
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* return struct for any lexize function */
|
|
|
|
typedef struct
|
|
|
|
{
|
2011-11-03 23:47:28 +01:00
|
|
|
/*----------
|
|
|
|
* Number of current variant of split word. For example the Norwegian
|
|
|
|
* word 'fotballklubber' has two variants to split: ( fotball, klubb )
|
|
|
|
* and ( fot, ball, klubb ). So, dictionary should return:
|
|
|
|
*
|
|
|
|
* nvariant lexeme
|
|
|
|
* 1 fotball
|
|
|
|
* 1 klubb
|
|
|
|
* 2 fot
|
|
|
|
* 2 ball
|
|
|
|
* 2 klubb
|
|
|
|
*
|
|
|
|
* In general, a TSLexeme will be considered to belong to the same split
|
|
|
|
* variant as the previous one if they have the same nvariant value.
|
|
|
|
* The exact values don't matter, only changes from one lexeme to next.
|
|
|
|
*----------
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
uint16 nvariant;
|
|
|
|
|
2011-11-03 23:47:28 +01:00
|
|
|
uint16 flags; /* See flag bits below */
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2011-11-03 23:47:28 +01:00
|
|
|
char *lexeme; /* C string */
|
2007-08-21 03:11:32 +02:00
|
|
|
} TSLexeme;
|
|
|
|
|
2011-11-03 23:47:28 +01:00
|
|
|
/* Flag bits that can appear in TSLexeme.flags */
|
2007-08-21 03:11:32 +02:00
|
|
|
#define TSL_ADDPOS 0x01
|
2008-05-16 18:31:02 +02:00
|
|
|
#define TSL_PREFIX 0x02
|
2009-08-18 12:30:41 +02:00
|
|
|
#define TSL_FILTER 0x04
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/*
|
2007-08-25 02:03:59 +02:00
|
|
|
* Struct for supporting complex dictionaries like thesaurus.
|
|
|
|
* 4th argument for dictlexize method is a pointer to this
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
bool isend; /* in: marks for lexize_info about text end is
|
|
|
|
* reached */
|
|
|
|
bool getnext; /* out: dict wants next lexeme */
|
2009-07-16 08:33:46 +02:00
|
|
|
void *private_state; /* internal dict state between calls with
|
2007-08-21 03:11:32 +02:00
|
|
|
* getnext == true */
|
|
|
|
} DictSubState;
|
|
|
|
|
|
|
|
#endif /* _PG_TS_PUBLIC_H_ */
|