The data structure used in unaccent is a trie, not suffix tree.

Fix the term used in variable and struct names, and comments.

Alexander Korotkov
This commit is contained in:
Heikki Linnakangas 2013-05-08 20:57:42 +03:00
parent 2ffa66f497
commit 4b06c1820a

View File

@ -23,30 +23,29 @@
PG_MODULE_MAGIC; PG_MODULE_MAGIC;
/* /*
* Unaccent dictionary uses uncompressed suffix tree to find a * Unaccent dictionary uses a trie to find a character to replace. Each node of
* character to replace. Each node of tree is an array of * the trie is an array of 256 TrieChar structs (n-th element of array
* SuffixChar struct with length = 256 (n-th element of array
* corresponds to byte) * corresponds to byte)
*/ */
typedef struct SuffixChar typedef struct TrieChar
{ {
struct SuffixChar *nextChar; struct TrieChar *nextChar;
char *replaceTo; char *replaceTo;
int replacelen; int replacelen;
} SuffixChar; } TrieChar;
/* /*
* placeChar - put str into tree's structure, byte by byte. * placeChar - put str into trie's structure, byte by byte.
*/ */
static SuffixChar * static TrieChar *
placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen) placeChar(TrieChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
{ {
SuffixChar *curnode; TrieChar *curnode;
if (!node) if (!node)
{ {
node = palloc(sizeof(SuffixChar) * 256); node = palloc(sizeof(TrieChar) * 256);
memset(node, 0, sizeof(SuffixChar) * 256); memset(node, 0, sizeof(TrieChar) * 256);
} }
curnode = node + *str; curnode = node + *str;
@ -71,13 +70,14 @@ placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int
} }
/* /*
* initSuffixTree - create suffix tree from file. Function converts * initTrie - create trie from file.
* UTF8-encoded file into current encoding. *
* Function converts UTF8-encoded file into current encoding.
*/ */
static SuffixChar * static TrieChar *
initSuffixTree(char *filename) initTrie(char *filename)
{ {
SuffixChar *volatile rootSuffixTree = NULL; TrieChar *volatile rootTrie = NULL;
MemoryContext ccxt = CurrentMemoryContext; MemoryContext ccxt = CurrentMemoryContext;
tsearch_readline_state trst; tsearch_readline_state trst;
volatile bool skip; volatile bool skip;
@ -161,7 +161,7 @@ initSuffixTree(char *filename)
} }
if (state >= 3) if (state >= 3)
rootSuffixTree = placeChar(rootSuffixTree, rootTrie = placeChar(rootTrie,
(unsigned char *) src, srclen, (unsigned char *) src, srclen,
trg, trglen); trg, trglen);
@ -192,14 +192,14 @@ initSuffixTree(char *filename)
tsearch_readline_end(&trst); tsearch_readline_end(&trst);
return rootSuffixTree; return rootTrie;
} }
/* /*
* findReplaceTo - find multibyte character in tree * findReplaceTo - find multibyte character in trie
*/ */
static SuffixChar * static TrieChar *
findReplaceTo(SuffixChar *node, unsigned char *src, int srclen) findReplaceTo(TrieChar *node, unsigned char *src, int srclen)
{ {
while (node) while (node)
{ {
@ -221,7 +221,7 @@ Datum
unaccent_init(PG_FUNCTION_ARGS) unaccent_init(PG_FUNCTION_ARGS)
{ {
List *dictoptions = (List *) PG_GETARG_POINTER(0); List *dictoptions = (List *) PG_GETARG_POINTER(0);
SuffixChar *rootSuffixTree = NULL; TrieChar *rootTrie = NULL;
bool fileloaded = false; bool fileloaded = false;
ListCell *l; ListCell *l;
@ -235,7 +235,7 @@ unaccent_init(PG_FUNCTION_ARGS)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple Rules parameters"))); errmsg("multiple Rules parameters")));
rootSuffixTree = initSuffixTree(defGetString(defel)); rootTrie = initTrie(defGetString(defel));
fileloaded = true; fileloaded = true;
} }
else else
@ -254,7 +254,7 @@ unaccent_init(PG_FUNCTION_ARGS)
errmsg("missing Rules parameter"))); errmsg("missing Rules parameter")));
} }
PG_RETURN_POINTER(rootSuffixTree); PG_RETURN_POINTER(rootTrie);
} }
PG_FUNCTION_INFO_V1(unaccent_lexize); PG_FUNCTION_INFO_V1(unaccent_lexize);
@ -262,21 +262,21 @@ Datum unaccent_lexize(PG_FUNCTION_ARGS);
Datum Datum
unaccent_lexize(PG_FUNCTION_ARGS) unaccent_lexize(PG_FUNCTION_ARGS)
{ {
SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0); TrieChar *rootTrie = (TrieChar *) PG_GETARG_POINTER(0);
char *srcchar = (char *) PG_GETARG_POINTER(1); char *srcchar = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2); int32 len = PG_GETARG_INT32(2);
char *srcstart, char *srcstart,
*trgchar = NULL; *trgchar = NULL;
int charlen; int charlen;
TSLexeme *res = NULL; TSLexeme *res = NULL;
SuffixChar *node; TrieChar *node;
srcstart = srcchar; srcstart = srcchar;
while (srcchar - srcstart < len) while (srcchar - srcstart < len)
{ {
charlen = pg_mblen(srcchar); charlen = pg_mblen(srcchar);
node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen); node = findReplaceTo(rootTrie, (unsigned char *) srcchar, charlen);
if (node && node->replaceTo) if (node && node->replaceTo)
{ {
if (!res) if (!res)