mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-27 23:51:48 +02:00
The data structure used in unaccent is a trie, not suffix tree.
Fix the term used in variable and struct names, and comments. Alexander Korotkov
This commit is contained in:
parent
2ffa66f497
commit
4b06c1820a
@ -23,30 +23,29 @@
|
|||||||
PG_MODULE_MAGIC;
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unaccent dictionary uses uncompressed suffix tree to find a
|
* Unaccent dictionary uses a trie to find a character to replace. Each node of
|
||||||
* character to replace. Each node of tree is an array of
|
* the trie is an array of 256 TrieChar structs (n-th element of array
|
||||||
* SuffixChar struct with length = 256 (n-th element of array
|
|
||||||
* corresponds to byte)
|
* corresponds to byte)
|
||||||
*/
|
*/
|
||||||
typedef struct SuffixChar
|
typedef struct TrieChar
|
||||||
{
|
{
|
||||||
struct SuffixChar *nextChar;
|
struct TrieChar *nextChar;
|
||||||
char *replaceTo;
|
char *replaceTo;
|
||||||
int replacelen;
|
int replacelen;
|
||||||
} SuffixChar;
|
} TrieChar;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* placeChar - put str into tree's structure, byte by byte.
|
* placeChar - put str into trie's structure, byte by byte.
|
||||||
*/
|
*/
|
||||||
static SuffixChar *
|
static TrieChar *
|
||||||
placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
|
placeChar(TrieChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
|
||||||
{
|
{
|
||||||
SuffixChar *curnode;
|
TrieChar *curnode;
|
||||||
|
|
||||||
if (!node)
|
if (!node)
|
||||||
{
|
{
|
||||||
node = palloc(sizeof(SuffixChar) * 256);
|
node = palloc(sizeof(TrieChar) * 256);
|
||||||
memset(node, 0, sizeof(SuffixChar) * 256);
|
memset(node, 0, sizeof(TrieChar) * 256);
|
||||||
}
|
}
|
||||||
|
|
||||||
curnode = node + *str;
|
curnode = node + *str;
|
||||||
@ -71,13 +70,14 @@ placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* initSuffixTree - create suffix tree from file. Function converts
|
* initTrie - create trie from file.
|
||||||
* UTF8-encoded file into current encoding.
|
*
|
||||||
|
* Function converts UTF8-encoded file into current encoding.
|
||||||
*/
|
*/
|
||||||
static SuffixChar *
|
static TrieChar *
|
||||||
initSuffixTree(char *filename)
|
initTrie(char *filename)
|
||||||
{
|
{
|
||||||
SuffixChar *volatile rootSuffixTree = NULL;
|
TrieChar *volatile rootTrie = NULL;
|
||||||
MemoryContext ccxt = CurrentMemoryContext;
|
MemoryContext ccxt = CurrentMemoryContext;
|
||||||
tsearch_readline_state trst;
|
tsearch_readline_state trst;
|
||||||
volatile bool skip;
|
volatile bool skip;
|
||||||
@ -161,7 +161,7 @@ initSuffixTree(char *filename)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (state >= 3)
|
if (state >= 3)
|
||||||
rootSuffixTree = placeChar(rootSuffixTree,
|
rootTrie = placeChar(rootTrie,
|
||||||
(unsigned char *) src, srclen,
|
(unsigned char *) src, srclen,
|
||||||
trg, trglen);
|
trg, trglen);
|
||||||
|
|
||||||
@ -192,14 +192,14 @@ initSuffixTree(char *filename)
|
|||||||
|
|
||||||
tsearch_readline_end(&trst);
|
tsearch_readline_end(&trst);
|
||||||
|
|
||||||
return rootSuffixTree;
|
return rootTrie;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* findReplaceTo - find multibyte character in tree
|
* findReplaceTo - find multibyte character in trie
|
||||||
*/
|
*/
|
||||||
static SuffixChar *
|
static TrieChar *
|
||||||
findReplaceTo(SuffixChar *node, unsigned char *src, int srclen)
|
findReplaceTo(TrieChar *node, unsigned char *src, int srclen)
|
||||||
{
|
{
|
||||||
while (node)
|
while (node)
|
||||||
{
|
{
|
||||||
@ -221,7 +221,7 @@ Datum
|
|||||||
unaccent_init(PG_FUNCTION_ARGS)
|
unaccent_init(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
||||||
SuffixChar *rootSuffixTree = NULL;
|
TrieChar *rootTrie = NULL;
|
||||||
bool fileloaded = false;
|
bool fileloaded = false;
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
|
|
||||||
@ -235,7 +235,7 @@ unaccent_init(PG_FUNCTION_ARGS)
|
|||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
errmsg("multiple Rules parameters")));
|
errmsg("multiple Rules parameters")));
|
||||||
rootSuffixTree = initSuffixTree(defGetString(defel));
|
rootTrie = initTrie(defGetString(defel));
|
||||||
fileloaded = true;
|
fileloaded = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -254,7 +254,7 @@ unaccent_init(PG_FUNCTION_ARGS)
|
|||||||
errmsg("missing Rules parameter")));
|
errmsg("missing Rules parameter")));
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_RETURN_POINTER(rootSuffixTree);
|
PG_RETURN_POINTER(rootTrie);
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(unaccent_lexize);
|
PG_FUNCTION_INFO_V1(unaccent_lexize);
|
||||||
@ -262,21 +262,21 @@ Datum unaccent_lexize(PG_FUNCTION_ARGS);
|
|||||||
Datum
|
Datum
|
||||||
unaccent_lexize(PG_FUNCTION_ARGS)
|
unaccent_lexize(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0);
|
TrieChar *rootTrie = (TrieChar *) PG_GETARG_POINTER(0);
|
||||||
char *srcchar = (char *) PG_GETARG_POINTER(1);
|
char *srcchar = (char *) PG_GETARG_POINTER(1);
|
||||||
int32 len = PG_GETARG_INT32(2);
|
int32 len = PG_GETARG_INT32(2);
|
||||||
char *srcstart,
|
char *srcstart,
|
||||||
*trgchar = NULL;
|
*trgchar = NULL;
|
||||||
int charlen;
|
int charlen;
|
||||||
TSLexeme *res = NULL;
|
TSLexeme *res = NULL;
|
||||||
SuffixChar *node;
|
TrieChar *node;
|
||||||
|
|
||||||
srcstart = srcchar;
|
srcstart = srcchar;
|
||||||
while (srcchar - srcstart < len)
|
while (srcchar - srcstart < len)
|
||||||
{
|
{
|
||||||
charlen = pg_mblen(srcchar);
|
charlen = pg_mblen(srcchar);
|
||||||
|
|
||||||
node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen);
|
node = findReplaceTo(rootTrie, (unsigned char *) srcchar, charlen);
|
||||||
if (node && node->replaceTo)
|
if (node && node->replaceTo)
|
||||||
{
|
{
|
||||||
if (!res)
|
if (!res)
|
||||||
|
Loading…
Reference in New Issue
Block a user