2007-08-21 03:11:32 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* ts_public.h
|
|
|
|
* Public interface to various tsearch modules, such as
|
|
|
|
* parsers and dictionaries.
|
|
|
|
*
|
2022-01-08 01:04:57 +01:00
|
|
|
* Copyright (c) 1998-2022, PostgreSQL Global Development Group
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/tsearch/ts_public.h
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef _PG_TS_PUBLIC_H_
|
|
|
|
#define _PG_TS_PUBLIC_H_
|
|
|
|
|
|
|
|
#include "tsearch/ts_type.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parser's framework
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* returning type for prslextype method of parser
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
int lexid;
|
|
|
|
char *alias;
|
|
|
|
char *descr;
|
|
|
|
} LexDescr;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Interface to headline generator
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
2016-04-07 17:44:18 +02:00
|
|
|
uint32 selected:1,
|
|
|
|
in:1,
|
|
|
|
replace:1,
|
|
|
|
repeated:1,
|
|
|
|
skip:1,
|
|
|
|
unused:3,
|
|
|
|
type:8,
|
|
|
|
len:16;
|
|
|
|
WordEntryPos pos;
|
|
|
|
char *word;
|
|
|
|
QueryOperand *item;
|
2007-08-21 03:11:32 +02:00
|
|
|
} HeadlineWordEntry;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
HeadlineWordEntry *words;
|
2012-06-25 00:51:46 +02:00
|
|
|
int32 lenwords;
|
|
|
|
int32 curwords;
|
2016-04-07 17:44:18 +02:00
|
|
|
int32 vectorpos; /* positions a-la tsvector */
|
2007-08-21 03:11:32 +02:00
|
|
|
char *startsel;
|
|
|
|
char *stopsel;
|
2008-10-17 20:05:19 +02:00
|
|
|
char *fragdelim;
|
2012-06-25 00:51:46 +02:00
|
|
|
int16 startsellen;
|
|
|
|
int16 stopsellen;
|
|
|
|
int16 fragdelimlen;
|
2007-08-21 03:11:32 +02:00
|
|
|
} HeadlineParsedText;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Common useful things for tsearch subsystem
|
|
|
|
*/
|
|
|
|
extern char *get_tsearch_config_filename(const char *basename,
|
|
|
|
const char *extension);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Often useful stopword list management
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
char **stop;
|
|
|
|
} StopList;
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
extern void readstoplist(const char *fname, StopList *s,
|
2007-11-09 23:37:35 +01:00
|
|
|
char *(*wordop) (const char *));
|
2007-08-25 02:03:59 +02:00
|
|
|
extern bool searchstoplist(StopList *s, char *key);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Interface with dictionaries
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* return struct for any lexize function */
|
|
|
|
typedef struct
|
|
|
|
{
|
2011-11-03 23:47:28 +01:00
|
|
|
/*----------
|
|
|
|
* Number of current variant of split word. For example the Norwegian
|
|
|
|
* word 'fotballklubber' has two variants to split: ( fotball, klubb )
|
|
|
|
* and ( fot, ball, klubb ). So, dictionary should return:
|
|
|
|
*
|
|
|
|
* nvariant lexeme
|
|
|
|
* 1 fotball
|
|
|
|
* 1 klubb
|
|
|
|
* 2 fot
|
|
|
|
* 2 ball
|
|
|
|
* 2 klubb
|
|
|
|
*
|
|
|
|
* In general, a TSLexeme will be considered to belong to the same split
|
|
|
|
* variant as the previous one if they have the same nvariant value.
|
|
|
|
* The exact values don't matter, only changes from one lexeme to next.
|
|
|
|
*----------
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
uint16 nvariant;
|
|
|
|
|
2011-11-03 23:47:28 +01:00
|
|
|
uint16 flags; /* See flag bits below */
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2011-11-03 23:47:28 +01:00
|
|
|
char *lexeme; /* C string */
|
2007-08-21 03:11:32 +02:00
|
|
|
} TSLexeme;
|
|
|
|
|
2011-11-03 23:47:28 +01:00
|
|
|
/* Flag bits that can appear in TSLexeme.flags */
|
2007-08-21 03:11:32 +02:00
|
|
|
#define TSL_ADDPOS 0x01
|
2008-05-16 18:31:02 +02:00
|
|
|
#define TSL_PREFIX 0x02
|
2009-08-18 12:30:41 +02:00
|
|
|
#define TSL_FILTER 0x04
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/*
|
2007-08-25 02:03:59 +02:00
|
|
|
* Struct for supporting complex dictionaries like thesaurus.
|
|
|
|
* 4th argument for dictlexize method is a pointer to this
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
bool isend; /* in: marks for lexize_info about text end is
|
|
|
|
* reached */
|
|
|
|
bool getnext; /* out: dict wants next lexeme */
|
2009-07-16 08:33:46 +02:00
|
|
|
void *private_state; /* internal dict state between calls with
|
2007-08-21 03:11:32 +02:00
|
|
|
* getnext == true */
|
|
|
|
} DictSubState;
|
|
|
|
|
|
|
|
#endif /* _PG_TS_PUBLIC_H_ */
|