Defend against non-ASCII letters in fuzzystrmatch code. The functions

still don't behave very sanely for multibyte encodings, but at least
they won't be indexing off the ends of static arrays.
This commit is contained in:
Tom Lane 2009-04-07 15:53:54 +00:00
parent c35eb1504a
commit bb6bbc3277
1 changed files with 28 additions and 10 deletions

View File

@ -5,7 +5,7 @@
*
* Joe Conway <mail@joeconway.com>
*
* $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.c,v 1.28 2009/01/01 17:23:32 momjian Exp $
* $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.c,v 1.29 2009/04/07 15:53:54 tgl Exp $
* Copyright (c) 2001-2009, PostgreSQL Global Development Group
* ALL RIGHTS RESERVED;
*
@ -74,7 +74,15 @@ static void _soundex(const char *instr, char *outstr);
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *soundex_table = "01230120022455012623010202";
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
static char
soundex_code(char letter)
{
letter = toupper((unsigned char) letter);
/* Defend against non-ASCII letters */
if (letter >= 'A' && letter <= 'Z')
return soundex_table[letter - 'A'];
return letter;
}
/*
@ -143,27 +151,37 @@ static int _metaphone(char *word, int max_phonemes, char **phoned_word);
/*-- Character encoding array & accessing macros --*/
/* Stolen directly out of the book... */
char _codes[26] = {
static const char _codes[26] = {
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
/* a b c d e f g h i j k l m n o p q r s t u v w x y z */
};
static int
getcode(char c)
{
if (isalpha((unsigned char) c))
{
c = toupper((unsigned char) c);
/* Defend against non-ASCII letters */
if (c >= 'A' && c <= 'Z')
return _codes[c - 'A'];
}
return 0;
}
#define ENCODE(c) (isalpha((unsigned char) (c)) ? _codes[((toupper((unsigned char) (c))) - 'A')] : 0)
#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */
#define isvowel(c) (getcode(c) & 1) /* AEIOU */
/* These letters are passed through unchanged */
#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */
#define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */
/* These form dipthongs when preceding H */
#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */
#define AFFECTH(c) (getcode(c) & 4) /* CGPST */
/* These make C and G soft */
#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */
#define MAKESOFT(c) (getcode(c) & 8) /* EIY */
/* These prevent GH from becoming F */
#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */
#define NOGHTOF(c) (getcode(c) & 16) /* BDH */
/*