postgresql/contrib/tsearch2/tsvector.h

102 lines
2.4 KiB
C
Raw Normal View History

2003-07-21 12:27:44 +02:00
#ifndef __TXTIDX_H__
#define __TXTIDX_H__
/*
#define TXTIDX_DEBUG
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
2003-08-04 02:43:34 +02:00
typedef struct
{
2003-07-21 12:27:44 +02:00
uint32
2003-08-04 02:43:34 +02:00
haspos:1,
len:11, /* MAX 2Kb */
pos:20; /* MAX 1Mb */
2003-07-21 12:27:44 +02:00
} WordEntry;
2003-08-04 02:43:34 +02:00
2003-07-21 12:27:44 +02:00
#define MAXSTRLEN ( 1<<11 )
#define MAXSTRPOS ( 1<<20 )
/*
2005-10-15 04:49:52 +02:00
Equivalent to
2003-08-04 02:43:34 +02:00
typedef struct
{
2005-10-15 04:49:52 +02:00
uint16
weight:2,
pos:14;
} WordEntryPos;
*/
typedef uint16 WordEntryPos;
#define WEP_GETWEIGHT(x) ( (x) >> 14 )
#define WEP_GETPOS(x) ( (x) & 0x3fff )
2005-10-15 04:49:52 +02:00
#define WEP_SETWEIGHT(x,v) (x) = ( (v) << 14 ) | ( (x) & 0x3fff )
#define WEP_SETPOS(x,v) (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff )
2003-08-04 02:43:34 +02:00
#define MAXENTRYPOS (1<<14)
2003-07-21 12:27:44 +02:00
#define MAXNUMPOS 256
2003-08-04 02:43:34 +02:00
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
2003-07-21 12:27:44 +02:00
/*
* Structure of tsvector datatype:
* 1) standard varlena header
* 2) int4 size - number of lexemes or WordEntry array, which is the same
* 3) Array of WordEntry - sorted array, comparison based on word's length
* and strncmp(). WordEntry->pos points number of
* bytes from end of WordEntry array to start of
* corresponding lexeme.
* 4) Lexeme's storage:
2006-10-04 02:30:14 +02:00
* SHORTALIGNED(lexeme) and position information if it exists
* Position information: first int2 - is a number of positions and it
* follows array of WordEntryPos
*/
2003-07-21 12:27:44 +02:00
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
2003-07-21 12:27:44 +02:00
int4 size;
char data[1];
} tsvector;
#define DATAHDRSIZE (VARHDRSZ + sizeof(int4))
#define CALCDATASIZE(x, lenstr) ( (x) * sizeof(WordEntry) + DATAHDRSIZE + (lenstr) )
#define ARRPTR(x) ( (WordEntry*) ( (char*)(x) + DATAHDRSIZE ) )
#define STRPTR(x) ( (char*)(x) + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)(x))->size ) )
#define STRSIZE(x) ( ((tsvector*)(x))->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)(x))->size ) )
2003-07-21 12:27:44 +02:00
#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
2003-08-04 02:43:34 +02:00
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
2003-07-21 12:27:44 +02:00
2003-08-04 02:43:34 +02:00
typedef struct
{
WordEntry entry;
WordEntryPos *pos;
2003-07-21 12:27:44 +02:00
} WordEntryIN;
typedef struct
{
char *prsbuf;
char *word;
char *curpos;
int4 len;
int4 state;
int4 alen;
2003-08-04 02:43:34 +02:00
WordEntryPos *pos;
2003-07-21 12:27:44 +02:00
bool oprisdelim;
} TI_IN_STATE;
int4 gettoken_tsvector(TI_IN_STATE * state);
#endif