postgresql/src/include/tsearch/ts_public.h

160 lines
4.7 KiB
C

/*-------------------------------------------------------------------------
*
* ts_public.h
* Public interface to various tsearch modules, such as
* parsers and dictionaries.
*
* Copyright (c) 1998-2023, PostgreSQL Global Development Group
*
* src/include/tsearch/ts_public.h
*
*-------------------------------------------------------------------------
*/
#ifndef _PG_TS_PUBLIC_H_
#define _PG_TS_PUBLIC_H_
#include "tsearch/ts_type.h"
/*
* Parser's framework
*/
/*
* returning type for prslextype method of parser
*/
typedef struct
{
int lexid;
char *alias;
char *descr;
} LexDescr;
/*
* Interface to headline generator (tsparser's prsheadline function)
*
* HeadlineParsedText describes the text that is to be highlighted.
* Some fields are passed from the core code to the prsheadline function,
* while others are output from the prsheadline function.
*
* The principal data is words[], an array of HeadlineWordEntry,
* one entry per token, of length curwords.
* The fields of HeadlineWordEntry are:
*
* in, selected, replace, skip: these flags are initially zero
* and may be set by the prsheadline function. A consecutive group
* of tokens marked "in" form a "fragment" to be output.
* Such tokens may additionally be marked selected, replace, or skip
* to modify how they are shown. (If you set more than one of those
* bits, you get an unspecified one of those behaviors.)
*
* type, len, pos, word: filled by core code to describe the token.
*
* item: if the token matches any operand of the tsquery of interest,
* a pointer to such an operand. (If there are multiple matching
* operands, we generate extra copies of the HeadlineWordEntry to hold
* all the pointers. The extras are marked with repeated = 1 and should
* be ignored except for checking the item pointer.)
*/
typedef struct
{
uint32 selected:1, /* token is to be highlighted */
in:1, /* token is part of headline */
replace:1, /* token is to be replaced with a space */
repeated:1, /* duplicate entry to hold item pointer */
skip:1, /* token is to be skipped (not output) */
unused:3, /* available bits */
type:8, /* parser's token category */
len:16; /* length of token */
WordEntryPos pos; /* position of token */
char *word; /* text of token (not null-terminated) */
QueryOperand *item; /* a matching query operand, or NULL if none */
} HeadlineWordEntry;
typedef struct
{
/* Fields filled by core code before calling prsheadline function: */
HeadlineWordEntry *words;
int32 lenwords; /* allocated length of words[] */
int32 curwords; /* current number of valid entries */
int32 vectorpos; /* used by ts_parse.c in filling pos fields */
/* The prsheadline function must fill these fields: */
/* Strings for marking selected tokens and separating fragments: */
char *startsel; /* palloc'd strings */
char *stopsel;
char *fragdelim;
int16 startsellen; /* lengths of strings */
int16 stopsellen;
int16 fragdelimlen;
} HeadlineParsedText;
/*
* Common useful things for tsearch subsystem
*/
extern char *get_tsearch_config_filename(const char *basename,
const char *extension);
/*
* Often useful stopword list management
*/
typedef struct
{
int len;
char **stop;
} StopList;
extern void readstoplist(const char *fname, StopList *s,
char *(*wordop) (const char *));
extern bool searchstoplist(StopList *s, char *key);
/*
* Interface with dictionaries
*/
/* return struct for any lexize function */
typedef struct
{
/*----------
* Number of current variant of split word. For example the Norwegian
* word 'fotballklubber' has two variants to split: ( fotball, klubb )
* and ( fot, ball, klubb ). So, dictionary should return:
*
* nvariant lexeme
* 1 fotball
* 1 klubb
* 2 fot
* 2 ball
* 2 klubb
*
* In general, a TSLexeme will be considered to belong to the same split
* variant as the previous one if they have the same nvariant value.
* The exact values don't matter, only changes from one lexeme to next.
*----------
*/
uint16 nvariant;
uint16 flags; /* See flag bits below */
char *lexeme; /* C string */
} TSLexeme;
/* Flag bits that can appear in TSLexeme.flags */
#define TSL_ADDPOS 0x01
#define TSL_PREFIX 0x02
#define TSL_FILTER 0x04
/*
* Struct for supporting complex dictionaries like thesaurus.
* 4th argument for dictlexize method is a pointer to this
*/
typedef struct
{
bool isend; /* in: marks for lexize_info about text end is
* reached */
bool getnext; /* out: dict wants next lexeme */
void *private_state; /* internal dict state between calls with
* getnext == true */
} DictSubState;
#endif /* _PG_TS_PUBLIC_H_ */