postgresql/src/backend/regex/regc_locale.c

/* 
 * regc_locale.c --
 *
 *	This file contains locale-specific regexp routines.
 *	This file is #included by regcomp.c.
 *
 * Copyright (c) 1998 by Scriptics Corporation.
 *
 * This software is copyrighted by the Regents of the University of
 * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
 * Corporation and other parties.  The following terms apply to all files
 * associated with the software unless explicitly disclaimed in
 * individual files.
 * 
 * The authors hereby grant permission to use, copy, modify, distribute,
 * and license this software and its documentation for any purpose, provided
 * that existing copyright notices are retained in all copies and that this
 * notice is included verbatim in any distributions. No written agreement,
 * license, or royalty fee is required for any of the authorized uses.
 * Modifications to this software may be copyrighted by their authors
 * and need not follow the licensing terms described here, provided that
 * the new terms are clearly indicated on the first page of each file where
 * they apply.
 * 
 * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
 * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
 * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
 * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
 * MODIFICATIONS.
 * 
 * GOVERNMENT USE: If you are acquiring this software on behalf of the
 * U.S. government, the Government shall have only "Restricted Rights"
 * in the software and related documentation as defined in the Federal 
 * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
 * are acquiring the software on behalf of the Department of Defense, the
 * software shall be classified as "Commercial Computer Software" and the
 * Government shall have only "Restricted Rights" as defined in Clause
 * 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
 * authors grant the U.S. Government and others acting in its behalf
 * permission to use and distribute the software in accordance with the
 * terms specified in this license. 
 *
 * $Header: /cvsroot/pgsql/src/backend/regex/regc_locale.c,v 1.1 2003/02/05 17:41:32 tgl Exp $
 */

/* ASCII character-name table */

static struct cname {
    char *name;
    char code;
} cnames[] = {
    {"NUL",		'\0'},
    {"SOH",		'\001'},
    {"STX",		'\002'},
    {"ETX",		'\003'},
    {"EOT",		'\004'},
    {"ENQ",		'\005'},
    {"ACK",		'\006'},
    {"BEL",		'\007'},
    {"alert",		'\007'},
    {"BS",		'\010'},
    {"backspace",	'\b'},
    {"HT",		'\011'},
    {"tab",		'\t'},
    {"LF",		'\012'},
    {"newline",		'\n'},
    {"VT",		'\013'},
    {"vertical-tab",	'\v'},
    {"FF",		'\014'},
    {"form-feed",	'\f'},
    {"CR",		'\015'},
    {"carriage-return",	'\r'},
    {"SO",		'\016'},
    {"SI",		'\017'},
    {"DLE",		'\020'},
    {"DC1",		'\021'},
    {"DC2",		'\022'},
    {"DC3",		'\023'},
    {"DC4",		'\024'},
    {"NAK",		'\025'},
    {"SYN",		'\026'},
    {"ETB",		'\027'},
    {"CAN",		'\030'},
    {"EM",		'\031'},
    {"SUB",		'\032'},
    {"ESC",		'\033'},
    {"IS4",		'\034'},
    {"FS",		'\034'},
    {"IS3",		'\035'},
    {"GS",		'\035'},
    {"IS2",		'\036'},
    {"RS",		'\036'},
    {"IS1",		'\037'},
    {"US",		'\037'},
    {"space",		' '},
    {"exclamation-mark",'!'},
    {"quotation-mark",	'"'},
    {"number-sign",	'#'},
    {"dollar-sign",	'$'},
    {"percent-sign",	'%'},
    {"ampersand",	'&'},
    {"apostrophe",	'\''},
    {"left-parenthesis",'('},
    {"right-parenthesis", ')'},
    {"asterisk",	'*'},
    {"plus-sign",	'+'},
    {"comma",		','},
    {"hyphen",		'-'},
    {"hyphen-minus",	'-'},
    {"period",		'.'},
    {"full-stop",	'.'},
    {"slash",		'/'},
    {"solidus",		'/'},
    {"zero",		'0'},
    {"one",		'1'},
    {"two",		'2'},
    {"three",		'3'},
    {"four",		'4'},
    {"five",		'5'},
    {"six",		'6'},
    {"seven",		'7'},
    {"eight",		'8'},
    {"nine",		'9'},
    {"colon",		':'},
    {"semicolon",	';'},
    {"less-than-sign",	'<'},
    {"equals-sign",	'='},
    {"greater-than-sign", '>'},
    {"question-mark",	'?'},
    {"commercial-at",	'@'},
    {"left-square-bracket", '['},
    {"backslash",	'\\'},
    {"reverse-solidus",	'\\'},
    {"right-square-bracket", ']'},
    {"circumflex",	'^'},
    {"circumflex-accent", '^'},
    {"underscore",	'_'},
    {"low-line",	'_'},
    {"grave-accent",	'`'},
    {"left-brace",	'{'},
    {"left-curly-bracket", '{'},
    {"vertical-line",	'|'},
    {"right-brace",	'}'},
    {"right-curly-bracket", '}'},
    {"tilde",		'~'},
    {"DEL",		'\177'},
    {NULL,		0}
};

/*
 * some ctype functions with non-ascii-char guard
 */
static int
pg_isdigit(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && isdigit((unsigned char) c));
}

static int
pg_isalpha(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && isalpha((unsigned char) c));
}

static int
pg_isalnum(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && isalnum((unsigned char) c));
}

static int
pg_isupper(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && isupper((unsigned char) c));
}

static int
pg_islower(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && islower((unsigned char) c));
}

static int
pg_isgraph(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && isgraph((unsigned char) c));
}

static int
pg_ispunct(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && ispunct((unsigned char) c));
}

static int
pg_isspace(pg_wchar c)
{
	return (c >= 0 && c <= UCHAR_MAX && isspace((unsigned char) c));
}

static pg_wchar
pg_toupper(pg_wchar c)
{
	if (c >= 0 && c <= UCHAR_MAX)
		return toupper((unsigned char) c);
	return c;
}

static pg_wchar
pg_tolower(pg_wchar c)
{
	if (c >= 0 && c <= UCHAR_MAX)
		return tolower((unsigned char) c);
	return c;
}


/*
 * nmcces - how many distinct MCCEs are there?
 */
static int
nmcces(struct vars *v)
{
    /*
     * No multi-character collating elements defined at the moment.
     */
    return 0;
}

/*
 * nleaders - how many chrs can be first chrs of MCCEs?
 */
static int
nleaders(struct vars *v)
{
    return 0;
}

/*
 * allmcces - return a cvec with all the MCCEs of the locale
 */
static struct cvec *
allmcces(struct vars *v,			/* context */
		 struct cvec *cv)			/* this is supposed to have enough room */
{
    return clearcvec(cv);
}

/*
 * element - map collating-element name to celt
 */
static celt
element(struct vars *v,			/* context */
		chr *startp,			/* points to start of name */
		chr *endp)				/* points just past end of name */
{
    struct cname *cn;
    size_t len;

    /* generic:  one-chr names stand for themselves */
    assert(startp < endp);
    len = endp - startp;
    if (len == 1) {
	return *startp;
    }

    NOTE(REG_ULOCALE);

    /* search table */
    for (cn=cnames; cn->name!=NULL; cn++) {
	if (strlen(cn->name)==len &&
		pg_char_and_wchar_strncmp(cn->name, startp, len)==0) {
	    break;			/* NOTE BREAK OUT */
	}
    }
    if (cn->name != NULL) {
	return CHR(cn->code);
    }

    /* couldn't find it */
    ERR(REG_ECOLLATE);
    return 0;
}

/*
 * range - supply cvec for a range, including legality check
 */
static struct cvec *
range(struct vars *v,			/* context */
	  celt a,				/* range start */
	  celt b,				/* range end, might equal a */
	  int cases)				/* case-independent? */
{
    int nchrs;
    struct cvec *cv;
    celt c, lc, uc;

    if (a != b && !before(a, b)) {
	ERR(REG_ERANGE);
	return NULL;
    }

    if (!cases) {			/* easy version */
	cv = getcvec(v, 0, 1, 0);
	NOERRN();
	addrange(cv, a, b);
	return cv;
    }

    /*
     * When case-independent, it's hard to decide when cvec ranges are
     * usable, so for now at least, we won't try.  We allocate enough
     * space for two case variants plus a little extra for the two
     * title case variants.
     */

    nchrs = (b - a + 1)*2 + 4;

    cv = getcvec(v, nchrs, 0, 0);
    NOERRN();

    for (c=a; c<=b; c++) {
	addchr(cv, c);
	lc = pg_tolower((chr)c);
	if (c != lc) {
	    addchr(cv, lc);
	}
	uc = pg_toupper((chr)c);
	if (c != uc) {
	    addchr(cv, uc);
	}
    }

    return cv;
}

/*
 * before - is celt x before celt y, for purposes of range legality?
 */
static int				/* predicate */
before(celt x, celt y)
{
    /* trivial because no MCCEs */
    if (x < y) {
	return 1;
    }
    return 0;
}

/*
 * eclass - supply cvec for an equivalence class
 * Must include case counterparts on request.
 */
static struct cvec *
eclass(struct vars *v,			/* context */
	   celt c,					/* Collating element representing
								 * the equivalence class. */
	   int cases)				/* all cases? */
{
    struct cvec *cv;

    /* crude fake equivalence class for testing */
    if ((v->cflags&REG_FAKE) && c == 'x') {
	cv = getcvec(v, 4, 0, 0);
	addchr(cv, (chr)'x');
	addchr(cv, (chr)'y');
	if (cases) {
	    addchr(cv, (chr)'X');
	    addchr(cv, (chr)'Y');
	}
	return cv;
    }

    /* otherwise, none */
    if (cases) {
	return allcases(v, c);
    }
    cv = getcvec(v, 1, 0, 0);
    assert(cv != NULL);
    addchr(cv, (chr)c);
    return cv;
}

/*
 * cclass - supply cvec for a character class
 *
 * Must include case counterparts on request.
 */
static struct cvec *
cclass(struct vars *v,			/* context */
	   chr *startp,			/* where the name starts */
	   chr *endp,				/* just past the end of the name */
	   int cases)				/* case-independent? */
{
    size_t len;
    struct cvec *cv = NULL;
    char **namePtr;
    int i, index;

    /*
     * The following arrays define the valid character class names.
     */

    static char *classNames[] = {
	"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
	"lower", "print", "punct", "space", "upper", "xdigit", NULL
    };

    enum classes {
	CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
	CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
    };

    /*
     * Map the name to the corresponding enumerated value.
     */
    len = endp - startp;
    index = -1;
    for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
	if (strlen(*namePtr) == len &&
		pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0) {
	    index = i;
	    break;
	}
    }
    if (index == -1) {
	ERR(REG_ECTYPE);
	return NULL;
    }

    /*
     * Remap lower and upper to alpha if the match is case insensitive.
     */

    if (cases &&
		((enum classes) index == CC_LOWER ||
		 (enum classes) index == CC_UPPER))
		index = (int) CC_ALPHA;
    
    /*
     * Now compute the character class contents.
	 *
	 * For the moment, assume that only char codes < 256 can be in these
	 * classes.
     */

    switch((enum classes) index) {
    case CC_PRINT:
    case CC_ALNUM:
	cv = getcvec(v, UCHAR_MAX, 1, 0);
	if (cv) {
	    for (i=0 ; i<= UCHAR_MAX ; i++) {
			if (pg_isalpha((chr) i))
				addchr(cv, (chr) i);
	    }
		addrange(cv, (chr) '0', (chr) '9');
	}
	break;
    case CC_ALPHA:
	cv = getcvec(v, UCHAR_MAX, 0, 0);
	if (cv) {
	    for (i=0 ; i<= UCHAR_MAX ; i++) {
			if (pg_isalpha((chr) i))
				addchr(cv, (chr) i);
	    }
	}
	break;
    case CC_ASCII:
	cv = getcvec(v, 0, 1, 0);
	if (cv) {
	    addrange(cv, 0, 0x7f);
	}
	break;
    case CC_BLANK:
	cv = getcvec(v, 2, 0, 0);
	addchr(cv, '\t');
	addchr(cv, ' ');
	break;
    case CC_CNTRL:
	cv = getcvec(v, 0, 2, 0);
	addrange(cv, 0x0, 0x1f);
	addrange(cv, 0x7f, 0x9f);
	break;
    case CC_DIGIT:
	cv = getcvec(v, 0, 1, 0);
	if (cv) {	
		addrange(cv, (chr) '0', (chr) '9');
	}
	break;
    case CC_PUNCT:
	cv = getcvec(v, UCHAR_MAX, 0, 0);
	if (cv) {
	    for (i=0 ; i<= UCHAR_MAX ; i++) {
			if (pg_ispunct((chr) i))
				addchr(cv, (chr) i);
	    }
	}
	break;
    case CC_XDIGIT:
	cv = getcvec(v, 0, 3, 0);
	if (cv) {	
	    addrange(cv, '0', '9');
	    addrange(cv, 'a', 'f');
	    addrange(cv, 'A', 'F');
	}
	break;
    case CC_SPACE:
	cv = getcvec(v, UCHAR_MAX, 0, 0);
	if (cv) {
	    for (i=0 ; i<= UCHAR_MAX ; i++) {
			if (pg_isspace((chr) i))
				addchr(cv, (chr) i);
	    }
	}
	break;
    case CC_LOWER:
	cv = getcvec(v, UCHAR_MAX, 0, 0);
	if (cv) {
	    for (i=0 ; i<= UCHAR_MAX ; i++) {
			if (pg_islower((chr) i))
				addchr(cv, (chr) i);
	    }
	}
	break;
    case CC_UPPER:
	cv = getcvec(v, UCHAR_MAX, 0, 0);
	if (cv) {
	    for (i=0 ; i<= UCHAR_MAX ; i++) {
			if (pg_isupper((chr) i))
				addchr(cv, (chr) i);
	    }
	}
	break;
    case CC_GRAPH:
	cv = getcvec(v, UCHAR_MAX, 0, 0);
	if (cv) {
	    for (i=0 ; i<= UCHAR_MAX ; i++) {
			if (pg_isgraph((chr) i))
				addchr(cv, (chr) i);
	    }
	}
	break;
    }
    if (cv == NULL) {
	ERR(REG_ESPACE);
    }
    return cv;
}

/*
 * allcases - supply cvec for all case counterparts of a chr (including itself)
 *
 * This is a shortcut, preferably an efficient one, for simple characters;
 * messy cases are done via range().
 */
static struct cvec *
allcases(struct vars *v,			/* context */
		 chr pc)				/* character to get case equivs of */
{
    struct cvec *cv;
    chr c = (chr)pc;
    chr lc, uc;

    lc = pg_tolower((chr)c);
    uc = pg_toupper((chr)c);

	cv = getcvec(v, 2, 0, 0);
    addchr(cv, lc);
    if (lc != uc) {
	addchr(cv, uc);
    }
    return cv;
}

/*
 * cmp - chr-substring compare
 *
 * Backrefs need this.  It should preferably be efficient.
 * Note that it does not need to report anything except equal/unequal.
 * Note also that the length is exact, and the comparison should not
 * stop at embedded NULs!
 */
static int				/* 0 for equal, nonzero for unequal */
cmp(const chr *x, const chr *y,			/* strings to compare */
    size_t len)				/* exact length of comparison */
{
    return memcmp(VS(x), VS(y), len*sizeof(chr));
}

/*
 * casecmp - case-independent chr-substring compare
 *
 * REG_ICASE backrefs need this.  It should preferably be efficient.
 * Note that it does not need to report anything except equal/unequal.
 * Note also that the length is exact, and the comparison should not
 * stop at embedded NULs!
 */
static int				/* 0 for equal, nonzero for unequal */
casecmp(const chr *x, const chr *y,			/* strings to compare */
		size_t len)				/* exact length of comparison */
{
    for (; len > 0; len--, x++, y++) {
	if ((*x!=*y) && (pg_tolower(*x) != pg_tolower(*y))) {
	    return 1;
	}
    }
    return 0;
}
Replace regular expression package with Henry Spencer's latest version (extracted from Tcl 8.4.1 release, as Henry still hasn't got round to making it a separate library). This solves a performance problem for multibyte, as well as upgrading our regexp support to match recent Tcl and nearly match recent Perl. 2003-02-05 18:41:33 +01:00			`/*`
			`* regc_locale.c --`
			`*`
			`* This file contains locale-specific regexp routines.`
			`* This file is #included by regcomp.c.`
			`*`
			`* Copyright (c) 1998 by Scriptics Corporation.`
			`*`
			`* This software is copyrighted by the Regents of the University of`
			`* California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState`
			`* Corporation and other parties. The following terms apply to all files`
			`* associated with the software unless explicitly disclaimed in`
			`* individual files.`
			`*`
			`* The authors hereby grant permission to use, copy, modify, distribute,`
			`* and license this software and its documentation for any purpose, provided`
			`* that existing copyright notices are retained in all copies and that this`
			`* notice is included verbatim in any distributions. No written agreement,`
			`* license, or royalty fee is required for any of the authorized uses.`
			`* Modifications to this software may be copyrighted by their authors`
			`* and need not follow the licensing terms described here, provided that`
			`* the new terms are clearly indicated on the first page of each file where`
			`* they apply.`
			`*`
			`* IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY`
			`* FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES`
			`* ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY`
			`* DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE`
			`* POSSIBILITY OF SUCH DAMAGE.`
			`*`
			`* THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,`
			`* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE`
			`* IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE`
			`* NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR`
			`* MODIFICATIONS.`
			`*`
			`* GOVERNMENT USE: If you are acquiring this software on behalf of the`
			`* U.S. government, the Government shall have only "Restricted Rights"`
			`* in the software and related documentation as defined in the Federal`
			`* Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you`
			`* are acquiring the software on behalf of the Department of Defense, the`
			`* software shall be classified as "Commercial Computer Software" and the`
			`* Government shall have only "Restricted Rights" as defined in Clause`
			`* 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the`
			`* authors grant the U.S. Government and others acting in its behalf`
			`* permission to use and distribute the software in accordance with the`
			`* terms specified in this license.`
			`*`
			`* $Header: /cvsroot/pgsql/src/backend/regex/regc_locale.c,v 1.1 2003/02/05 17:41:32 tgl Exp $`
			`*/`

			`/* ASCII character-name table */`

			`static struct cname {`
			`char *name;`
			`char code;`
			`} cnames[] = {`
			`{"NUL", '\0'},`
			`{"SOH", '\001'},`
			`{"STX", '\002'},`
			`{"ETX", '\003'},`
			`{"EOT", '\004'},`
			`{"ENQ", '\005'},`
			`{"ACK", '\006'},`
			`{"BEL", '\007'},`
			`{"alert", '\007'},`
			`{"BS", '\010'},`
			`{"backspace", '\b'},`
			`{"HT", '\011'},`
			`{"tab", '\t'},`
			`{"LF", '\012'},`
			`{"newline", '\n'},`
			`{"VT", '\013'},`
			`{"vertical-tab", '\v'},`
			`{"FF", '\014'},`
			`{"form-feed", '\f'},`
			`{"CR", '\015'},`
			`{"carriage-return", '\r'},`
			`{"SO", '\016'},`
			`{"SI", '\017'},`
			`{"DLE", '\020'},`
			`{"DC1", '\021'},`
			`{"DC2", '\022'},`
			`{"DC3", '\023'},`
			`{"DC4", '\024'},`
			`{"NAK", '\025'},`
			`{"SYN", '\026'},`
			`{"ETB", '\027'},`
			`{"CAN", '\030'},`
			`{"EM", '\031'},`
			`{"SUB", '\032'},`
			`{"ESC", '\033'},`
			`{"IS4", '\034'},`
			`{"FS", '\034'},`
			`{"IS3", '\035'},`
			`{"GS", '\035'},`
			`{"IS2", '\036'},`
			`{"RS", '\036'},`
			`{"IS1", '\037'},`
			`{"US", '\037'},`
			`{"space", ' '},`
			`{"exclamation-mark",'!'},`
			`{"quotation-mark", '"'},`
			`{"number-sign", '#'},`
			`{"dollar-sign", '$'},`
			`{"percent-sign", '%'},`
			`{"ampersand", '&'},`
			`{"apostrophe", '\''},`
			`{"left-parenthesis",'('},`
			`{"right-parenthesis", ')'},`
			`{"asterisk", '*'},`
			`{"plus-sign", '+'},`
			`{"comma", ','},`
			`{"hyphen", '-'},`
			`{"hyphen-minus", '-'},`
			`{"period", '.'},`
			`{"full-stop", '.'},`
			`{"slash", '/'},`
			`{"solidus", '/'},`
			`{"zero", '0'},`
			`{"one", '1'},`
			`{"two", '2'},`
			`{"three", '3'},`
			`{"four", '4'},`
			`{"five", '5'},`
			`{"six", '6'},`
			`{"seven", '7'},`
			`{"eight", '8'},`
			`{"nine", '9'},`
			`{"colon", ':'},`
			`{"semicolon", ';'},`
			`{"less-than-sign", '<'},`
			`{"equals-sign", '='},`
			`{"greater-than-sign", '>'},`
			`{"question-mark", '?'},`
			`{"commercial-at", '@'},`
			`{"left-square-bracket", '['},`
			`{"backslash", '\\'},`
			`{"reverse-solidus", '\\'},`
			`{"right-square-bracket", ']'},`
			`{"circumflex", '^'},`
			`{"circumflex-accent", '^'},`
			`{"underscore", '_'},`
			`{"low-line", '_'},`
			{"grave-accent", '`'},
			`{"left-brace", '{'},`
			`{"left-curly-bracket", '{'},`
			`{"vertical-line", '\|'},`
			`{"right-brace", '}'},`
			`{"right-curly-bracket", '}'},`
			`{"tilde", '~'},`
			`{"DEL", '\177'},`
			`{NULL, 0}`
			`};`

			`/*`
			`* some ctype functions with non-ascii-char guard`
			`*/`
			`static int`
			`pg_isdigit(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && isdigit((unsigned char) c));`
			`}`

			`static int`
			`pg_isalpha(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && isalpha((unsigned char) c));`
			`}`

			`static int`
			`pg_isalnum(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && isalnum((unsigned char) c));`
			`}`

			`static int`
			`pg_isupper(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && isupper((unsigned char) c));`
			`}`

			`static int`
			`pg_islower(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && islower((unsigned char) c));`
			`}`

			`static int`
			`pg_isgraph(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && isgraph((unsigned char) c));`
			`}`

			`static int`
			`pg_ispunct(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && ispunct((unsigned char) c));`
			`}`

			`static int`
			`pg_isspace(pg_wchar c)`
			`{`
			`return (c >= 0 && c <= UCHAR_MAX && isspace((unsigned char) c));`
			`}`

			`static pg_wchar`
			`pg_toupper(pg_wchar c)`
			`{`
			`if (c >= 0 && c <= UCHAR_MAX)`
			`return toupper((unsigned char) c);`
			`return c;`
			`}`

			`static pg_wchar`
			`pg_tolower(pg_wchar c)`
			`{`
			`if (c >= 0 && c <= UCHAR_MAX)`
			`return tolower((unsigned char) c);`
			`return c;`
			`}`


			`/*`
			`* nmcces - how many distinct MCCEs are there?`
			`*/`
			`static int`
			`nmcces(struct vars *v)`
			`{`
			`/*`
			`* No multi-character collating elements defined at the moment.`
			`*/`
			`return 0;`
			`}`

			`/*`
			`* nleaders - how many chrs can be first chrs of MCCEs?`
			`*/`
			`static int`
			`nleaders(struct vars *v)`
			`{`
			`return 0;`
			`}`

			`/*`
			`* allmcces - return a cvec with all the MCCEs of the locale`
			`*/`
			`static struct cvec *`
			`allmcces(struct vars v, / context */`
			`struct cvec cv) / this is supposed to have enough room */`
			`{`
			`return clearcvec(cv);`
			`}`

			`/*`
			`* element - map collating-element name to celt`
			`*/`
			`static celt`
			`element(struct vars v, / context */`
			`chr startp, / points to start of name */`
			`chr endp) / points just past end of name */`
			`{`
			`struct cname *cn;`
			`size_t len;`

			`/* generic: one-chr names stand for themselves */`
			`assert(startp < endp);`
			`len = endp - startp;`
			`if (len == 1) {`
			`return *startp;`
			`}`

			`NOTE(REG_ULOCALE);`

			`/* search table */`
			`for (cn=cnames; cn->name!=NULL; cn++) {`
			`if (strlen(cn->name)==len &&`
			`pg_char_and_wchar_strncmp(cn->name, startp, len)==0) {`
			`break; /* NOTE BREAK OUT */`
			`}`
			`}`
			`if (cn->name != NULL) {`
			`return CHR(cn->code);`
			`}`

			`/* couldn't find it */`
			`ERR(REG_ECOLLATE);`
			`return 0;`
			`}`

			`/*`
			`* range - supply cvec for a range, including legality check`
			`*/`
			`static struct cvec *`
			`range(struct vars v, / context */`
			`celt a, /* range start */`
			`celt b, /* range end, might equal a */`
			`int cases) /* case-independent? */`
			`{`
			`int nchrs;`
			`struct cvec *cv;`
			`celt c, lc, uc;`

			`if (a != b && !before(a, b)) {`
			`ERR(REG_ERANGE);`
			`return NULL;`
			`}`

			`if (!cases) { /* easy version */`
			`cv = getcvec(v, 0, 1, 0);`
			`NOERRN();`
			`addrange(cv, a, b);`
			`return cv;`
			`}`

			`/*`
			`* When case-independent, it's hard to decide when cvec ranges are`
			`* usable, so for now at least, we won't try. We allocate enough`
			`* space for two case variants plus a little extra for the two`
			`* title case variants.`
			`*/`

			`nchrs = (b - a + 1)*2 + 4;`

			`cv = getcvec(v, nchrs, 0, 0);`
			`NOERRN();`

			`for (c=a; c<=b; c++) {`
			`addchr(cv, c);`
			`lc = pg_tolower((chr)c);`
			`if (c != lc) {`
			`addchr(cv, lc);`
			`}`
			`uc = pg_toupper((chr)c);`
			`if (c != uc) {`
			`addchr(cv, uc);`
			`}`
			`}`

			`return cv;`
			`}`

			`/*`
			`* before - is celt x before celt y, for purposes of range legality?`
			`*/`
			`static int /* predicate */`
			`before(celt x, celt y)`
			`{`
			`/* trivial because no MCCEs */`
			`if (x < y) {`
			`return 1;`
			`}`
			`return 0;`
			`}`

			`/*`
			`* eclass - supply cvec for an equivalence class`
			`* Must include case counterparts on request.`
			`*/`
			`static struct cvec *`
			`eclass(struct vars v, / context */`
			`celt c, /* Collating element representing`
			`* the equivalence class. */`
			`int cases) /* all cases? */`
			`{`
			`struct cvec *cv;`

			`/* crude fake equivalence class for testing */`
			`if ((v->cflags&REG_FAKE) && c == 'x') {`
			`cv = getcvec(v, 4, 0, 0);`
			`addchr(cv, (chr)'x');`
			`addchr(cv, (chr)'y');`
			`if (cases) {`
			`addchr(cv, (chr)'X');`
			`addchr(cv, (chr)'Y');`
			`}`
			`return cv;`
			`}`

			`/* otherwise, none */`
			`if (cases) {`
			`return allcases(v, c);`
			`}`
			`cv = getcvec(v, 1, 0, 0);`
			`assert(cv != NULL);`
			`addchr(cv, (chr)c);`
			`return cv;`
			`}`

			`/*`
			`* cclass - supply cvec for a character class`
			`*`
			`* Must include case counterparts on request.`
			`*/`
			`static struct cvec *`
			`cclass(struct vars v, / context */`
			`chr startp, / where the name starts */`
			`chr endp, / just past the end of the name */`
			`int cases) /* case-independent? */`
			`{`
			`size_t len;`
			`struct cvec *cv = NULL;`
			`char **namePtr;`
			`int i, index;`

			`/*`
			`* The following arrays define the valid character class names.`
			`*/`

			`static char *classNames[] = {`
			`"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",`
			`"lower", "print", "punct", "space", "upper", "xdigit", NULL`
			`};`

			`enum classes {`
			`CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,`
			`CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT`
			`};`

			`/*`
			`* Map the name to the corresponding enumerated value.`
			`*/`
			`len = endp - startp;`
			`index = -1;`
			`for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {`
			`if (strlen(*namePtr) == len &&`
			`pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0) {`
			`index = i;`
			`break;`
			`}`
			`}`
			`if (index == -1) {`
			`ERR(REG_ECTYPE);`
			`return NULL;`
			`}`

			`/*`
			`* Remap lower and upper to alpha if the match is case insensitive.`
			`*/`

			`if (cases &&`
			`((enum classes) index == CC_LOWER \|\|`
			`(enum classes) index == CC_UPPER))`
			`index = (int) CC_ALPHA;`

			`/*`
			`* Now compute the character class contents.`
			`*`
			`* For the moment, assume that only char codes < 256 can be in these`
			`* classes.`
			`*/`

			`switch((enum classes) index) {`
			`case CC_PRINT:`
			`case CC_ALNUM:`
			`cv = getcvec(v, UCHAR_MAX, 1, 0);`
			`if (cv) {`
			`for (i=0 ; i<= UCHAR_MAX ; i++) {`
			`if (pg_isalpha((chr) i))`
			`addchr(cv, (chr) i);`
			`}`
			`addrange(cv, (chr) '0', (chr) '9');`
			`}`
			`break;`
			`case CC_ALPHA:`
			`cv = getcvec(v, UCHAR_MAX, 0, 0);`
			`if (cv) {`
			`for (i=0 ; i<= UCHAR_MAX ; i++) {`
			`if (pg_isalpha((chr) i))`
			`addchr(cv, (chr) i);`
			`}`
			`}`
			`break;`
			`case CC_ASCII:`
			`cv = getcvec(v, 0, 1, 0);`
			`if (cv) {`
			`addrange(cv, 0, 0x7f);`
			`}`
			`break;`
			`case CC_BLANK:`
			`cv = getcvec(v, 2, 0, 0);`
			`addchr(cv, '\t');`
			`addchr(cv, ' ');`
			`break;`
			`case CC_CNTRL:`
			`cv = getcvec(v, 0, 2, 0);`
			`addrange(cv, 0x0, 0x1f);`
			`addrange(cv, 0x7f, 0x9f);`
			`break;`
			`case CC_DIGIT:`
			`cv = getcvec(v, 0, 1, 0);`
			`if (cv) {`
			`addrange(cv, (chr) '0', (chr) '9');`
			`}`
			`break;`
			`case CC_PUNCT:`
			`cv = getcvec(v, UCHAR_MAX, 0, 0);`
			`if (cv) {`
			`for (i=0 ; i<= UCHAR_MAX ; i++) {`
			`if (pg_ispunct((chr) i))`
			`addchr(cv, (chr) i);`
			`}`
			`}`
			`break;`
			`case CC_XDIGIT:`
			`cv = getcvec(v, 0, 3, 0);`
			`if (cv) {`
			`addrange(cv, '0', '9');`
			`addrange(cv, 'a', 'f');`
			`addrange(cv, 'A', 'F');`
			`}`
			`break;`
			`case CC_SPACE:`
			`cv = getcvec(v, UCHAR_MAX, 0, 0);`
			`if (cv) {`
			`for (i=0 ; i<= UCHAR_MAX ; i++) {`
			`if (pg_isspace((chr) i))`
			`addchr(cv, (chr) i);`
			`}`
			`}`
			`break;`
			`case CC_LOWER:`
			`cv = getcvec(v, UCHAR_MAX, 0, 0);`
			`if (cv) {`
			`for (i=0 ; i<= UCHAR_MAX ; i++) {`
			`if (pg_islower((chr) i))`
			`addchr(cv, (chr) i);`
			`}`
			`}`
			`break;`
			`case CC_UPPER:`
			`cv = getcvec(v, UCHAR_MAX, 0, 0);`
			`if (cv) {`
			`for (i=0 ; i<= UCHAR_MAX ; i++) {`
			`if (pg_isupper((chr) i))`
			`addchr(cv, (chr) i);`
			`}`
			`}`
			`break;`
			`case CC_GRAPH:`
			`cv = getcvec(v, UCHAR_MAX, 0, 0);`
			`if (cv) {`
			`for (i=0 ; i<= UCHAR_MAX ; i++) {`
			`if (pg_isgraph((chr) i))`
			`addchr(cv, (chr) i);`
			`}`
			`}`
			`break;`
			`}`
			`if (cv == NULL) {`
			`ERR(REG_ESPACE);`
			`}`
			`return cv;`
			`}`

			`/*`
			`* allcases - supply cvec for all case counterparts of a chr (including itself)`
			`*`
			`* This is a shortcut, preferably an efficient one, for simple characters;`
			`* messy cases are done via range().`
			`*/`
			`static struct cvec *`
			`allcases(struct vars v, / context */`
			`chr pc) /* character to get case equivs of */`
			`{`
			`struct cvec *cv;`
			`chr c = (chr)pc;`
			`chr lc, uc;`

			`lc = pg_tolower((chr)c);`
			`uc = pg_toupper((chr)c);`

			`cv = getcvec(v, 2, 0, 0);`
			`addchr(cv, lc);`
			`if (lc != uc) {`
			`addchr(cv, uc);`
			`}`
			`return cv;`
			`}`

			`/*`
			`* cmp - chr-substring compare`
			`*`
			`* Backrefs need this. It should preferably be efficient.`
			`* Note that it does not need to report anything except equal/unequal.`
			`* Note also that the length is exact, and the comparison should not`
			`* stop at embedded NULs!`
			`*/`
			`static int /* 0 for equal, nonzero for unequal */`
			`cmp(const chr x, const chr y, /* strings to compare */`
			`size_t len) /* exact length of comparison */`
			`{`
			`return memcmp(VS(x), VS(y), len*sizeof(chr));`
			`}`

			`/*`
			`* casecmp - case-independent chr-substring compare`
			`*`
			`* REG_ICASE backrefs need this. It should preferably be efficient.`
			`* Note that it does not need to report anything except equal/unequal.`
			`* Note also that the length is exact, and the comparison should not`
			`* stop at embedded NULs!`
			`*/`
			`static int /* 0 for equal, nonzero for unequal */`
			`casecmp(const chr x, const chr y, /* strings to compare */`
			`size_t len) /* exact length of comparison */`
			`{`
			`for (; len > 0; len--, x++, y++) {`
			`if ((x!=y) && (pg_tolower(x) != pg_tolower(y))) {`
			`return 1;`
			`}`
			`}`
			`return 0;`
			`}`