[ Patch comments in three pieces.]

Attached is a pacth against 7.2 which adds locale awareness to the
character classes of the regular expression engine.

...

> > I still think the xdigit class could be handled the same way the digit
> > class is (by enumeration rather than using the isxdigit function). That
> > saves you a cicle, and I don't think there's any loss.
>
> In fact, I will email you when I apply the original patch.

I miss that case :-(. Here is the pached patch.

...

Here is a patch which addresses Tatsuo's concerns (it does return an
static struct instead of constructing it).
This commit is contained in:
Bruce Momjian 2002-04-24 01:51:11 +00:00
parent 450e728d24
commit f71c924f10
1 changed files with 92 additions and 2 deletions

View File

@ -47,8 +47,17 @@
#include "regex/regex.h"
#include "regex/utils.h"
#include "regex/regex2.h"
#include "regex/cclass.h"
#include "regex/cname.h"
#include <locale.h>
struct cclass
{
char *name;
char *chars;
char *multis;
};
static struct cclass* cclasses = NULL;
static struct cclass* cclass_init(void);
/*
* parse structure, passed up and down to avoid global variables and
@ -174,6 +183,9 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags)
pg_wchar *wcp;
#endif
if ( cclasses == NULL )
cclasses = cclass_init();
#ifdef REDEBUG
#define GOODFLAGS(f) (f)
#else
@ -884,7 +896,7 @@ p_b_cclass(struct parse * p, cset *cs)
struct cclass *cp;
size_t len;
char *u;
char c;
unsigned char c;
while (MORE() && pg_isalpha(PEEK()))
NEXT();
@ -1716,3 +1728,81 @@ pg_islower(int c)
return (islower((unsigned char) c));
#endif
}
static struct cclass *
cclass_init(void)
{
static struct cclass cclasses_C[] = {
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "" },
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "" },
{ "blank", " \t", "" },
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", "" },
{ "digit", "0123456789", "" },
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
{ "lower", "abcdefghijklmnopqrstuvwxyz", "" },
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "" },
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
{ "space", "\t\n\v\f\r ", "" },
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "" },
{ "xdigit", "0123456789ABCDEFabcdef", "" },
{ NULL, NULL, "" }
};
struct cclass *cp = NULL;
struct cclass *classes = NULL;
struct cclass_factory
{
char *name;
int (*func)(int);
char *chars;
} cclass_factories [] =
{
{ "alnum", isalnum, NULL },
{ "alpha", isalpha, NULL },
{ "blank", NULL, " \t" },
{ "cntrl", iscntrl, NULL },
{ "digit", NULL, "0123456789" },
{ "graph", isgraph, NULL },
{ "lower", islower, NULL },
{ "print", isprint, NULL },
{ "punct", ispunct, NULL },
{ "space", NULL, "\t\n\v\f\r " },
{ "upper", isupper, NULL },
{ "xdigit", NULL, "0123456789ABCDEFabcdef" },
{ NULL, NULL, NULL }
};
struct cclass_factory *cf = NULL;
if ( strcmp( setlocale( LC_CTYPE, NULL ), "C" ) == 0 )
return cclasses_C;
classes = malloc(sizeof(struct cclass) * (sizeof(cclass_factories) / sizeof(struct cclass_factory)));
if (classes == NULL)
elog(ERROR,"cclass_init: out of memory");
cp = classes;
for(cf = cclass_factories; cf->name != NULL; cf++)
{
cp->name = strdup(cf->name);
if ( cf->chars )
cp->chars = strdup(cf->chars);
else
{
int x = 0, y = 0;
cp->chars = malloc(sizeof(char) * 256);
if (cp->chars == NULL)
elog(ERROR,"cclass_init: out of memory");
for (x = 0; x < 256; x++)
{
if((cf->func)(x))
*(cp->chars + y++) = x;
}
*(cp->chars + y) = '\0';
}
cp->multis = "";
cp++;
}
cp->name = cp->chars = NULL;
cp->multis = "";
return classes;
}