95 lines
2.4 KiB
C
95 lines
2.4 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* kwlookup.c
|
|
* Key word lookup for PostgreSQL
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/common/kwlookup.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "c.h"
|
|
|
|
#include "common/kwlookup.h"
|
|
|
|
|
|
/*
|
|
* ScanKeywordLookup - see if a given word is a keyword
|
|
*
|
|
* The list of keywords to be matched against is passed as a ScanKeywordList.
|
|
*
|
|
* Returns the keyword number (0..N-1) of the keyword, or -1 if no match.
|
|
* Callers typically use the keyword number to index into information
|
|
* arrays, but that is no concern of this code.
|
|
*
|
|
* The match is done case-insensitively. Note that we deliberately use a
|
|
* dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
|
|
* even if we are in a locale where tolower() would produce more or different
|
|
* translations. This is to conform to the SQL99 spec, which says that
|
|
* keywords are to be matched in this way even though non-keyword identifiers
|
|
* receive a different case-normalization mapping.
|
|
*/
|
|
int
|
|
ScanKeywordLookup(const char *text,
|
|
const ScanKeywordList *keywords)
|
|
{
|
|
int len,
|
|
i;
|
|
char word[NAMEDATALEN];
|
|
const char *kw_string;
|
|
const uint16 *kw_offsets;
|
|
const uint16 *low;
|
|
const uint16 *high;
|
|
|
|
len = strlen(text);
|
|
|
|
if (len > keywords->max_kw_len)
|
|
return -1; /* too long to be any keyword */
|
|
|
|
/* We assume all keywords are shorter than NAMEDATALEN. */
|
|
Assert(len < NAMEDATALEN);
|
|
|
|
/*
|
|
* Apply an ASCII-only downcasing. We must not use tolower() since it may
|
|
* produce the wrong translation in some locales (eg, Turkish).
|
|
*/
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
char ch = text[i];
|
|
|
|
if (ch >= 'A' && ch <= 'Z')
|
|
ch += 'a' - 'A';
|
|
word[i] = ch;
|
|
}
|
|
word[len] = '\0';
|
|
|
|
/*
|
|
* Now do a binary search using plain strcmp() comparison.
|
|
*/
|
|
kw_string = keywords->kw_string;
|
|
kw_offsets = keywords->kw_offsets;
|
|
low = kw_offsets;
|
|
high = kw_offsets + (keywords->num_keywords - 1);
|
|
while (low <= high)
|
|
{
|
|
const uint16 *middle;
|
|
int difference;
|
|
|
|
middle = low + (high - low) / 2;
|
|
difference = strcmp(kw_string + *middle, word);
|
|
if (difference == 0)
|
|
return middle - kw_offsets;
|
|
else if (difference < 0)
|
|
low = middle + 1;
|
|
else
|
|
high = middle - 1;
|
|
}
|
|
|
|
return -1;
|
|
}
|