Sync our regex code with upstream changes since last time we did this, which

was Tcl 8.4.8.  The main changes are to remove the never-fully-implemented
code for multi-character collating elements, and to const-ify some stuff a
bit more fully.  In combination with the recent security patch, this commit
brings us into line with Tcl 8.5.0.

Note that I didn't make any effort to duplicate a lot of cosmetic changes
that they made to bring their copy into line with their own style
guidelines, such as adding braces around single-line IF bodies.  Most of
those we either had done already (such as ANSI-fication of function headers)
or there is no point because pgindent would undo the change anyway.
This commit is contained in:
Tom Lane 2008-02-14 17:33:37 +00:00
parent 423abf4d6a
commit df1e965e12
9 changed files with 165 additions and 546 deletions

View File

@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.8 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
*
*
* Note that there are some incestuous relationships between this code and
@ -222,7 +222,6 @@ static color /* COLORLESS for error */
newcolor(struct colormap * cm)
{
struct colordesc *cd;
struct colordesc *new;
size_t n;
if (CISERR())
@ -245,24 +244,25 @@ newcolor(struct colormap * cm)
else
{
/* oops, must allocate more */
struct colordesc *newCd;
n = cm->ncds * 2;
if (cm->cd == cm->cdspace)
{
new = (struct colordesc *) MALLOC(n *
sizeof(struct colordesc));
if (new != NULL)
memcpy(VS(new), VS(cm->cdspace), cm->ncds *
newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
if (newCd != NULL)
memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
sizeof(struct colordesc));
}
else
new = (struct colordesc *) REALLOC(cm->cd,
n * sizeof(struct colordesc));
if (new == NULL)
newCd = (struct colordesc *)
REALLOC(cm->cd, n * sizeof(struct colordesc));
if (newCd == NULL)
{
CERR(REG_ESPACE);
return COLORLESS;
}
cm->cd = new;
cm->cd = newCd;
cm->ncds = n;
assert(cm->max < cm->ncds - 1);
cm->max++;
@ -634,21 +634,6 @@ uncolorchain(struct colormap * cm,
a->colorchainRev = NULL;
}
/*
* singleton - is this character in its own color?
*/
static int /* predicate */
singleton(struct colormap * cm,
chr c)
{
color co; /* color of c */
co = GETCOLOR(cm, c);
if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB)
return 1;
return 0;
}
/*
* rainbow - add arcs of all full colors (but one) between specified states
*/

View File

@ -28,33 +28,31 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.5 2005/10/15 02:49:24 momjian Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.6 2008/02/14 17:33:37 tgl Exp $
*
*/
/*
* Notes:
* Only (selected) functions in _this_ file should treat chr* as non-constant.
*/
/*
* newcvec - allocate a new cvec
*/
static struct cvec *
newcvec(int nchrs, /* to hold this many chrs... */
int nranges, /* ... and this many ranges... */
int nmcces) /* ... and this many MCCEs */
int nranges) /* ... and this many ranges */
{
size_t n;
size_t nc;
struct cvec *cv;
size_t nc = (size_t) nchrs + (size_t) nranges * 2;
size_t n = sizeof(struct cvec) + nc * sizeof(chr);
struct cvec *cv = (struct cvec *) MALLOC(n);
nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2;
n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *)
+ nc * sizeof(chr);
cv = (struct cvec *) MALLOC(n);
if (cv == NULL)
return NULL;
cv->chrspace = nchrs;
cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
cv->mccespace = nmcces;
cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1);
cv->chrs = (chr *) (((char *) cv) + sizeof(struct cvec));
cv->ranges = cv->chrs + nchrs;
cv->rangespace = nranges;
return clearcvec(cv);
}
@ -66,17 +64,9 @@ newcvec(int nchrs, /* to hold this many chrs... */
static struct cvec *
clearcvec(struct cvec * cv)
{
int i;
assert(cv != NULL);
cv->nchrs = 0;
assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]);
cv->nmcces = 0;
cv->nmccechrs = 0;
cv->nranges = 0;
for (i = 0; i < cv->mccespace; i++)
cv->mcces[i] = NULL;
return cv;
}
@ -87,7 +77,6 @@ static void
addchr(struct cvec * cv, /* character vector */
chr c) /* character to add */
{
assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
cv->chrs[cv->nchrs++] = (chr) c;
}
@ -105,73 +94,21 @@ addrange(struct cvec * cv, /* character vector */
cv->nranges++;
}
/*
* addmcce - add an MCCE to a cvec
*/
static void
addmcce(struct cvec * cv, /* character vector */
chr *startp, /* beginning of text */
chr *endp) /* just past end of text */
{
int len;
int i;
chr *s;
chr *d;
if (startp == NULL && endp == NULL)
return;
len = endp - startp;
assert(len > 0);
assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
assert(cv->nmcces < cv->mccespace);
d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
cv->mcces[cv->nmcces++] = d;
for (s = startp, i = len; i > 0; s++, i--)
*d++ = *s;
*d++ = 0; /* endmarker */
assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
cv->nmccechrs += len + 1;
}
/*
* haschr - does a cvec contain this chr?
*/
static int /* predicate */
haschr(struct cvec * cv, /* character vector */
chr c) /* character to test for */
{
int i;
chr *p;
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
{
if (*p == c)
return 1;
}
for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
{
if ((*p <= c) && (c <= *(p + 1)))
return 1;
}
return 0;
}
/*
* getcvec - get a cvec, remembering it as v->cv
*/
static struct cvec *
getcvec(struct vars * v, /* context */
int nchrs, /* to hold this many chrs... */
int nranges, /* ... and this many ranges... */
int nmcces) /* ... and this many MCCEs */
int nranges) /* ... and this many ranges */
{
if (v->cv != NULL && nchrs <= v->cv->chrspace &&
nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace)
nranges <= v->cv->rangespace)
return clearcvec(v->cv);
if (v->cv != NULL)
freecvec(v->cv);
v->cv = newcvec(nchrs, nranges, nmcces);
v->cv = newcvec(nchrs, nranges);
if (v->cv == NULL)
ERR(REG_ESPACE);

View File

@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.7 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.8 2008/02/14 17:33:37 tgl Exp $
*
*/
@ -201,8 +201,8 @@ prefixes(struct vars * v)
*/
static void
lexnest(struct vars * v,
chr *beginp, /* start of interpolation */
chr *endp) /* one past end of interpolation */
const chr *beginp, /* start of interpolation */
const chr *endp) /* one past end of interpolation */
{
assert(v->savenow == NULL); /* only one level of nesting */
v->savenow = v->now;
@ -214,47 +214,47 @@ lexnest(struct vars * v,
/*
* string constants to interpolate as expansions of things like \d
*/
static chr backd[] = { /* \d */
static const chr backd[] = { /* \d */
CHR('['), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']')
};
static chr backD[] = { /* \D */
static const chr backD[] = { /* \D */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']')
};
static chr brbackd[] = { /* \d within brackets */
static const chr brbackd[] = { /* \d within brackets */
CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']')
};
static chr backs[] = { /* \s */
static const chr backs[] = { /* \s */
CHR('['), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']')
};
static chr backS[] = { /* \S */
static const chr backS[] = { /* \S */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']')
};
static chr brbacks[] = { /* \s within brackets */
static const chr brbacks[] = { /* \s within brackets */
CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']')
};
static chr backw[] = { /* \w */
static const chr backw[] = { /* \w */
CHR('['), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr backW[] = { /* \W */
static const chr backW[] = { /* \W */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr brbackw[] = { /* \w within brackets */
static const chr brbackw[] = { /* \w within brackets */
CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_')
@ -722,7 +722,7 @@ lexescape(struct vars * v)
static chr esc[] = {
CHR('E'), CHR('S'), CHR('C')
};
chr *save;
const chr *save;
assert(v->cflags & REG_ADVF);
@ -1080,7 +1080,7 @@ brenext(struct vars * v,
static void
skip(struct vars * v)
{
chr *start = v->now;
const chr *start = v->now;
assert(v->cflags & REG_EXPANDED);
@ -1119,8 +1119,8 @@ newline(void)
*/
static chr
chrnamed(struct vars * v,
chr *startp, /* start of name */
chr *endp, /* just past end of name */
const chr *startp, /* start of name */
const chr *endp, /* just past end of name */
chr lastresort) /* what to return if name lookup fails */
{
celt c;

View File

@ -47,15 +47,15 @@
* permission to use and distribute the software in accordance with the
* terms specified in this license.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.8 2005/11/22 18:17:19 momjian Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
*/
/* ASCII character-name table */
static struct cname
static const struct cname
{
char *name;
char code;
const char *name;
const char code;
} cnames[] =
{
@ -423,46 +423,15 @@ pg_wc_tolower(pg_wchar c)
}
/*
* nmcces - how many distinct MCCEs are there?
*/
static int
nmcces(struct vars * v)
{
/*
* No multi-character collating elements defined at the moment.
*/
return 0;
}
/*
* nleaders - how many chrs can be first chrs of MCCEs?
*/
static int
nleaders(struct vars * v)
{
return 0;
}
/*
* allmcces - return a cvec with all the MCCEs of the locale
*/
static struct cvec *
allmcces(struct vars * v, /* context */
struct cvec * cv) /* this is supposed to have enough room */
{
return clearcvec(cv);
}
/*
* element - map collating-element name to celt
*/
static celt
element(struct vars * v, /* context */
chr *startp, /* points to start of name */
chr *endp) /* points just past end of name */
const chr *startp, /* points to start of name */
const chr *endp) /* points just past end of name */
{
struct cname *cn;
const struct cname *cn;
size_t len;
/* generic: one-chr names stand for themselves */
@ -513,7 +482,7 @@ range(struct vars * v, /* context */
if (!cases)
{ /* easy version */
cv = getcvec(v, 0, 1, 0);
cv = getcvec(v, 0, 1);
NOERRN();
addrange(cv, a, b);
return cv;
@ -527,7 +496,7 @@ range(struct vars * v, /* context */
nchrs = (b - a + 1) * 2 + 4;
cv = getcvec(v, nchrs, 0, 0);
cv = getcvec(v, nchrs, 0);
NOERRN();
for (c = a; c <= b; c++)
@ -550,7 +519,6 @@ range(struct vars * v, /* context */
static int /* predicate */
before(celt x, celt y)
{
/* trivial because no MCCEs */
if (x < y)
return 1;
return 0;
@ -571,7 +539,7 @@ eclass(struct vars * v, /* context */
/* crude fake equivalence class for testing */
if ((v->cflags & REG_FAKE) && c == 'x')
{
cv = getcvec(v, 4, 0, 0);
cv = getcvec(v, 4, 0);
addchr(cv, (chr) 'x');
addchr(cv, (chr) 'y');
if (cases)
@ -585,7 +553,7 @@ eclass(struct vars * v, /* context */
/* otherwise, none */
if (cases)
return allcases(v, c);
cv = getcvec(v, 1, 0, 0);
cv = getcvec(v, 1, 0);
assert(cv != NULL);
addchr(cv, (chr) c);
return cv;
@ -598,13 +566,13 @@ eclass(struct vars * v, /* context */
*/
static struct cvec *
cclass(struct vars * v, /* context */
chr *startp, /* where the name starts */
chr *endp, /* just past the end of the name */
const chr *startp, /* where the name starts */
const chr *endp, /* just past the end of the name */
int cases) /* case-independent? */
{
size_t len;
struct cvec *cv = NULL;
char **namePtr;
const char **namePtr;
int i,
index;
@ -612,7 +580,7 @@ cclass(struct vars * v, /* context */
* The following arrays define the valid character class names.
*/
static char *classNames[] = {
static const char *classNames[] = {
"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "xdigit", NULL
};
@ -662,7 +630,7 @@ cclass(struct vars * v, /* context */
switch ((enum classes) index)
{
case CC_PRINT:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -673,7 +641,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ALNUM:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -684,7 +652,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ALPHA:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -695,27 +663,27 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ASCII:
cv = getcvec(v, 0, 1, 0);
cv = getcvec(v, 0, 1);
if (cv)
addrange(cv, 0, 0x7f);
break;
case CC_BLANK:
cv = getcvec(v, 2, 0, 0);
cv = getcvec(v, 2, 0);
addchr(cv, '\t');
addchr(cv, ' ');
break;
case CC_CNTRL:
cv = getcvec(v, 0, 2, 0);
cv = getcvec(v, 0, 2);
addrange(cv, 0x0, 0x1f);
addrange(cv, 0x7f, 0x9f);
break;
case CC_DIGIT:
cv = getcvec(v, 0, 1, 0);
cv = getcvec(v, 0, 1);
if (cv)
addrange(cv, (chr) '0', (chr) '9');
break;
case CC_PUNCT:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -726,7 +694,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_XDIGIT:
cv = getcvec(v, 0, 3, 0);
cv = getcvec(v, 0, 3);
if (cv)
{
addrange(cv, '0', '9');
@ -735,7 +703,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_SPACE:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -746,7 +714,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_LOWER:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -757,7 +725,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_UPPER:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -768,7 +736,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_GRAPH:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@ -802,7 +770,7 @@ allcases(struct vars * v, /* context */
lc = pg_wc_tolower((chr) c);
uc = pg_wc_toupper((chr) c);
cv = getcvec(v, 2, 0, 0);
cv = getcvec(v, 2, 0);
addchr(cv, lc);
if (lc != uc)
addchr(cv, uc);

View File

@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.5 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.6 2008/02/14 17:33:37 tgl Exp $
*
*
* One or two things that technically ought to be in here
@ -349,8 +349,6 @@ newarc(struct nfa * nfa,
if (COLORED(a) && nfa->parent == NULL)
colorchain(nfa->cm, a);
return;
}
/*
@ -361,8 +359,6 @@ allocarc(struct nfa * nfa,
struct state * s)
{
struct arc *a;
struct arcbatch *new;
int i;
/* shortcut */
if (s->free == NULL && s->noas < ABSIZE)
@ -375,22 +371,25 @@ allocarc(struct nfa * nfa,
/* if none at hand, get more */
if (s->free == NULL)
{
new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
if (new == NULL)
struct arcbatch *newAb;
int i;
newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
if (newAb == NULL)
{
NERR(REG_ESPACE);
return NULL;
}
new->next = s->oas.next;
s->oas.next = new;
newAb->next = s->oas.next;
s->oas.next = newAb;
for (i = 0; i < ABSIZE; i++)
{
new->a[i].type = 0;
new->a[i].freechain = &new->a[i + 1];
newAb->a[i].type = 0;
newAb->a[i].freechain = &newAb->a[i + 1];
}
new->a[ABSIZE - 1].freechain = NULL;
s->free = &new->a[0];
newAb->a[ABSIZE - 1].freechain = NULL;
s->free = &newAb->a[0];
}
assert(s->free != NULL);
@ -495,20 +494,20 @@ cparc(struct nfa * nfa,
*/
static void
moveins(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
while ((a = old->ins) != NULL)
while ((a = oldState->ins) != NULL)
{
cparc(nfa, a, a->from, new);
cparc(nfa, a, a->from, newState);
freearc(nfa, a);
}
assert(old->nins == 0);
assert(old->ins == NULL);
assert(oldState->nins == 0);
assert(oldState->ins == NULL);
}
/*
@ -516,15 +515,15 @@ moveins(struct nfa * nfa,
*/
static void
copyins(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
for (a = old->ins; a != NULL; a = a->inchain)
cparc(nfa, a, a->from, new);
for (a = oldState->ins; a != NULL; a = a->inchain)
cparc(nfa, a, a->from, newState);
}
/*
@ -532,16 +531,16 @@ copyins(struct nfa * nfa,
*/
static void
moveouts(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
while ((a = old->outs) != NULL)
while ((a = oldState->outs) != NULL)
{
cparc(nfa, a, new, a->to);
cparc(nfa, a, newState, a->to);
freearc(nfa, a);
}
}
@ -551,15 +550,15 @@ moveouts(struct nfa * nfa,
*/
static void
copyouts(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
for (a = old->outs; a != NULL; a = a->outchain)
cparc(nfa, a, new, a->to);
for (a = oldState->outs; a != NULL; a = a->outchain)
cparc(nfa, a, newState, a->to);
}
/*

View File

@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.45 2007/10/06 16:05:54 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.46 2008/02/14 17:33:37 tgl Exp $
*
*/
@ -51,11 +51,9 @@ static void repeat(struct vars *, struct state *, struct state *, int, int);
static void bracket(struct vars *, struct state *, struct state *);
static void cbracket(struct vars *, struct state *, struct state *);
static void brackpart(struct vars *, struct state *, struct state *);
static chr *scanplain(struct vars *);
static void leaders(struct vars *, struct cvec *);
static const chr *scanplain(struct vars *);
static void onechr(struct vars *, chr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
static celt nextleader(struct vars *, chr, chr);
static void wordchrs(struct vars *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *);
@ -74,12 +72,12 @@ static void rfree(regex_t *);
static void dump(regex_t *, FILE *);
static void dumpst(struct subre *, FILE *, int);
static void stdump(struct subre *, FILE *, int);
static char *stid(struct subre *, char *, size_t);
static const char *stid(struct subre *, char *, size_t);
#endif
/* === regc_lex.c === */
static void lexstart(struct vars *);
static void prefixes(struct vars *);
static void lexnest(struct vars *, chr *, chr *);
static void lexnest(struct vars *, const chr *, const chr *);
static void lexword(struct vars *);
static int next(struct vars *);
static int lexescape(struct vars *);
@ -87,7 +85,7 @@ static chr lexdigits(struct vars *, int, int, int);
static int brenext(struct vars *, chr);
static void skip(struct vars *);
static chr newline(void);
static chr chrnamed(struct vars *, chr *, chr *, chr);
static chr chrnamed(struct vars *, const chr *, const chr *, chr);
/* === regc_color.c === */
static void initcm(struct vars *, struct colormap *);
@ -105,7 +103,6 @@ static void subblock(struct vars *, chr, struct state *, struct state *);
static void okcolors(struct nfa *, struct colormap *);
static void colorchain(struct colormap *, struct arc *);
static void uncolorchain(struct colormap *, struct arc *);
static int singleton(struct colormap *, chr c);
static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
@ -168,13 +165,11 @@ static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
#endif
/* === regc_cvec.c === */
static struct cvec *newcvec(int, int, int);
static struct cvec *newcvec(int, int);
static struct cvec *clearcvec(struct cvec *);
static void addchr(struct cvec *, chr);
static void addrange(struct cvec *, chr, chr);
static void addmcce(struct cvec *, chr *, chr *);
static int haschr(struct cvec *, chr);
static struct cvec *getcvec(struct vars *, int, int, int);
static struct cvec *getcvec(struct vars *, int, int);
static void freecvec(struct cvec *);
/* === regc_locale.c === */
@ -189,14 +184,11 @@ static int pg_wc_ispunct(pg_wchar c);
static int pg_wc_isspace(pg_wchar c);
static pg_wchar pg_wc_toupper(pg_wchar c);
static pg_wchar pg_wc_tolower(pg_wchar c);
static int nmcces(struct vars *);
static int nleaders(struct vars *);
static struct cvec *allmcces(struct vars *, struct cvec *);
static celt element(struct vars *, chr *, chr *);
static celt element(struct vars *, const chr *, const chr *);
static struct cvec *range(struct vars *, celt, celt, int);
static int before(celt, celt);
static struct cvec *eclass(struct vars *, celt, int);
static struct cvec *cclass(struct vars *, chr *, chr *, int);
static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
static struct cvec *allcases(struct vars *, chr);
static int cmp(const chr *, const chr *, size_t);
static int casecmp(const chr *, const chr *, size_t);
@ -206,10 +198,10 @@ static int casecmp(const chr *, const chr *, size_t);
struct vars
{
regex_t *re;
chr *now; /* scan pointer into string */
chr *stop; /* end of string */
chr *savenow; /* saved now and stop for "subroutine call" */
chr *savestop;
const chr *now; /* scan pointer into string */
const chr *stop; /* end of string */
const chr *savenow; /* saved now and stop for "subroutine call" */
const chr *savestop;
int err; /* error code (0 if none) */
int cflags; /* copy of compile flags */
int lasttype; /* type of previous token */
@ -230,10 +222,6 @@ struct vars
int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
struct cvec *mcces; /* collating-element information */
#define ISCELEADER(v,c) ((v)->mcces != NULL && haschr((v)->mcces, (c)))
struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
struct state *mccepend; /* in nfa, end of MCCE prototypes */
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
};
@ -275,9 +263,8 @@ struct vars
#define PREFER 'P' /* length preference */
/* is an arc colored, and hence on a color chain? */
#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \
(a)->type == BEHIND)
#define COLORED(a) \
((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
/* static function list */
@ -322,7 +309,7 @@ pg_regcomp(regex_t *re,
/* initial setup (after which freev() is callable) */
v->re = re;
v->now = (chr *) string;
v->now = string;
v->stop = v->now + len;
v->savenow = v->savestop = NULL;
v->err = 0;
@ -341,7 +328,6 @@ pg_regcomp(regex_t *re,
v->treefree = NULL;
v->cv = NULL;
v->cv2 = NULL;
v->mcces = NULL;
v->lacons = NULL;
v->nlacons = 0;
re->re_magic = REMAGIC;
@ -363,19 +349,9 @@ pg_regcomp(regex_t *re,
ZAPCNFA(g->search);
v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);
CNOERR();
v->cv = newcvec(100, 20, 10);
v->cv = newcvec(100, 20);
if (v->cv == NULL)
return freev(v, REG_ESPACE);
i = nmcces(v);
if (i > 0)
{
v->mcces = newcvec(nleaders(v), 0, i);
CNOERR();
v->mcces = allmcces(v, v->mcces);
leaders(v, v->mcces);
addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */
}
CNOERR();
/* parsing */
lexstart(v); /* also handles prefixes */
@ -525,8 +501,6 @@ freev(struct vars * v,
freecvec(v->cv);
if (v->cv2 != NULL)
freecvec(v->cv2);
if (v->mcces != NULL)
freecvec(v->mcces);
if (v->lacons != NULL)
freelacons(v->lacons, v->nlacons);
ERR(err); /* nop if err==0 */
@ -583,15 +557,14 @@ makesearch(struct vars * v,
for (b = s->ins; b != NULL; b = b->inchain)
if (b->from != pre)
break;
if (b != NULL)
{ /* must be split */
if (s->tmp == NULL)
{ /* if not already in the list */
/* (fixes bugs 505048, 230589, */
/* 840258, 504785) */
s->tmp = slist;
slist = s;
}
if (b != NULL && s->tmp == NULL)
{
/*
* Must be split if not already in the list (fixes bugs 505048,
* 230589, 840258, 504785).
*/
s->tmp = slist;
slist = s;
}
}
@ -1338,13 +1311,6 @@ cbracket(struct vars * v,
{
struct state *left = newstate(v->nfa);
struct state *right = newstate(v->nfa);
struct state *s;
struct arc *a; /* arc from lp */
struct arc *ba; /* arc from left, from bracket() */
struct arc *pa; /* MCCE-prototype arc */
color co;
chr *p;
int i;
NOERR();
bracket(v, left, right);
@ -1354,65 +1320,13 @@ cbracket(struct vars * v,
assert(lp->nouts == 0); /* all outarcs will be ours */
/* easy part of complementing */
/*
* Easy part of complementing, and all there is to do since the MCCE code
* was removed.
*/
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR();
if (v->mcces == NULL)
{ /* no MCCEs -- we're done */
dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
return;
}
/* but complementing gets messy in the presence of MCCEs... */
NOTE(REG_ULOCALE);
for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--)
{
co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
ba = findarc(left, PLAIN, co);
if (ba == NULL)
{
assert(a != NULL);
freearc(v->nfa, a);
}
else
assert(a == NULL);
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
if (ba == NULL)
{ /* easy case, need all of them */
cloneouts(v->nfa, pa->to, s, rp, PLAIN);
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
}
else
{ /* must be selective */
if (findarc(ba->to, '$', 1) == NULL)
{
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, pa->to,
s, rp);
}
for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)
if (findarc(ba->to, PLAIN, pa->co) == NULL)
newarc(v->nfa, PLAIN, pa->co, s, rp);
if (s->nouts == 0) /* limit of selectivity: none */
dropstate(v->nfa, s); /* frees arc too */
}
NOERR();
}
delsub(v->nfa, left, right);
assert(left->nouts == 0);
freestate(v->nfa, left);
dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
}
@ -1428,8 +1342,8 @@ brackpart(struct vars * v,
celt startc;
celt endc;
struct cvec *cv;
chr *startp;
chr *endp;
const chr *startp;
const chr *endp;
chr c[1];
/* parse something, get rid of special cases, take shortcuts */
@ -1442,8 +1356,8 @@ brackpart(struct vars * v,
case PLAIN:
c[0] = v->nextvalue;
NEXT();
/* shortcut for ordinary chr (not range, not MCCE leader) */
if (!SEE(RANGE) && !ISCELEADER(v, c[0]))
/* shortcut for ordinary chr (not range) */
if (!SEE(RANGE))
{
onechr(v, c[0], lp, rp);
return;
@ -1533,10 +1447,10 @@ brackpart(struct vars * v,
* Certain bits of trickery in lex.c know that this code does not try
* to look past the final bracket of the [. etc.
*/
static chr * /* just after end of sequence */
static const chr * /* just after end of sequence */
scanplain(struct vars * v)
{
chr *endp;
const chr *endp;
assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
NEXT();
@ -1554,52 +1468,6 @@ scanplain(struct vars * v)
return endp;
}
/*
* leaders - process a cvec of collating elements to also include leaders
* Also gives all characters involved their own colors, which is almost
* certainly necessary, and sets up little disconnected subNFA.
*/
static void
leaders(struct vars * v,
struct cvec * cv)
{
int mcce;
chr *p;
chr leader;
struct state *s;
struct arc *a;
v->mccepbegin = newstate(v->nfa);
v->mccepend = newstate(v->nfa);
NOERR();
for (mcce = 0; mcce < cv->nmcces; mcce++)
{
p = cv->mcces[mcce];
leader = *p;
if (!haschr(cv, leader))
{
addchr(cv, leader);
s = newstate(v->nfa);
newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
v->mccepbegin, s);
okcolors(v->nfa, v->cm);
}
else
{
a = findarc(v->mccepbegin, PLAIN,
GETCOLOR(v->cm, leader));
assert(a != NULL);
s = a->to;
assert(s != v->mccepend);
}
p++;
assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for now */
newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
okcolors(v->nfa, v->cm);
}
}
/*
* onechr - fill in arcs for a plain character, and possible case complements
* This is mostly a shortcut for efficient handling of the common case.
@ -1622,7 +1490,6 @@ onechr(struct vars * v,
/*
* dovec - fill in arcs for each element of a cvec
* This one has to handle the messy cases, like MCCEs and MCCE leaders.
*/
static void
dovec(struct vars * v,
@ -1633,47 +1500,14 @@ dovec(struct vars * v,
chr ch,
from,
to;
celt ce;
chr *p;
const chr *p;
int i;
color co;
struct cvec *leads;
struct arc *a;
struct arc *pa; /* arc in prototype */
struct state *s;
struct state *ps; /* state in prototype */
/* need a place to store leaders, if any */
if (nmcces(v) > 0)
{
assert(v->mcces != NULL);
if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs)
{
if (v->cv2 != NULL)
free(v->cv2);
v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
NOERR();
leads = v->cv2;
}
else
leads = clearcvec(v->cv2);
}
else
leads = NULL;
/* first, get the ordinary characters out of the way */
/* ordinary characters */
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
{
ch = *p;
if (!ISCELEADER(v, ch))
newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
else
{
assert(singleton(v->cm, ch));
assert(leads != NULL);
if (!haschr(leads, ch))
addchr(leads, ch);
}
newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
}
/* and the ranges */
@ -1681,103 +1515,9 @@ dovec(struct vars * v,
{
from = *p;
to = *(p + 1);
while (from <= to && (ce = nextleader(v, from, to)) != NOCELT)
{
if (from < ce)
subrange(v, from, ce - 1, lp, rp);
assert(singleton(v->cm, ce));
assert(leads != NULL);
if (!haschr(leads, ce))
addchr(leads, ce);
from = ce + 1;
}
if (from <= to)
subrange(v, from, to, lp, rp);
}
if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
return;
/* deal with the MCCE leaders */
NOTE(REG_ULOCALE);
for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--)
{
co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
else
{
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
}
pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
ps = pa->to;
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
NOERR();
}
/* and the MCCEs */
for (i = 0; i < cv->nmcces; i++)
{
p = cv->mcces[i];
assert(singleton(v->cm, *p));
if (!singleton(v->cm, *p))
{
ERR(REG_ASSERT);
return;
}
ch = *p++;
co = GETCOLOR(v->cm, ch);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
else
{
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
}
assert(*p != 0); /* at least two chars */
assert(singleton(v->cm, *p));
ch = *p++;
co = GETCOLOR(v->cm, ch);
assert(*p == 0); /* and only two, for now */
newarc(v->nfa, PLAIN, co, s, rp);
NOERR();
}
}
/*
* nextleader - find next MCCE leader within range
*/
static celt /* NOCELT means none */
nextleader(struct vars * v,
chr from,
chr to)
{
int i;
chr *p;
chr ch;
celt it = NOCELT;
if (v->mcces == NULL)
return it;
for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++)
{
ch = *p;
if (from <= ch && ch <= to)
if (it == NOCELT || ch < it)
it = ch;
}
return it;
}
/*
@ -1825,9 +1565,8 @@ subre(struct vars * v,
struct state * begin,
struct state * end)
{
struct subre *ret;
struct subre *ret = v->treefree;
ret = v->treefree;
if (ret != NULL)
v->treefree = ret->left;
else
@ -1906,14 +1645,13 @@ static void
optst(struct vars * v,
struct subre * t)
{
if (t == NULL)
return;
/* recurse through children */
if (t->left != NULL)
optst(v, t->left);
if (t->right != NULL)
optst(v, t->right);
/*
* DGP (2007-11-13): I assume it was the programmer's intent to eventually
* come back and add code to optimize subRE trees, but the routine coded
* just spends effort traversing the tree and doing nothing. We can do
* nothing with less effort.
*/
return;
}
/*
@ -2207,8 +1945,8 @@ stdump(struct subre * t,
{
fprintf(f, "\n");
dumpcnfa(&t->cnfa, f);
fprintf(f, "\n");
}
fprintf(f, "\n");
if (t->left != NULL)
stdump(t->left, f, nfapresent);
if (t->right != NULL)
@ -2218,7 +1956,7 @@ stdump(struct subre * t,
/*
* stid - identify a subtree node for dumping
*/
static char * /* points to buf or constant string */
static const char * /* points to buf or constant string */
stid(struct subre * t,
char *buf,
size_t bufsize)

View File

@ -27,7 +27,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.27 2003/11/29 19:51:55 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.28 2008/02/14 17:33:37 tgl Exp $
*
*/
@ -40,8 +40,8 @@ static char unk[] = "*** unknown regex error code 0x%x ***";
static struct rerr
{
int code;
char *name;
char *explain;
const char *name;
const char *explain;
} rerrs[] =
{
@ -63,7 +63,7 @@ pg_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */
size_t errbuf_size) /* available space in errbuf, can be 0 */
{
struct rerr *r;
char *msg;
const char *msg;
char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */
size_t len;
int icode;

View File

@ -25,7 +25,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.6 2007/10/06 16:01:51 tgl Exp $
* $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.7 2008/02/14 17:33:37 tgl Exp $
*/
/* headers if any */
@ -47,9 +47,9 @@
/* internal character type and related */
typedef pg_wchar chr; /* the type itself */
typedef unsigned uchr; /* unsigned type that will hold a chr */
typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
typedef int celt; /* type to hold chr, or NOCELT */
#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
#define NOCELT (-1) /* celt value which is not valid chr */
#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
#define CHRBITS 32 /* bits in a chr; must not use sizeof */

View File

@ -27,7 +27,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.6 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.7 2008/02/14 17:33:37 tgl Exp $
*/
@ -181,7 +181,7 @@ union tree
#define tcolor colors.ccolor
#define tptr ptrs.pptr
/* internal per-color structure for the color machinery */
/* internal per-color descriptor structure for the color machinery */
struct colordesc
{
uchr nchrs; /* number of chars of this color */
@ -228,11 +228,11 @@ struct colormap
#endif
/*
* Interface definitions for locale-interface functions in locale.c.
* Multi-character collating elements (MCCEs) cause most of the trouble.
*/
/* Representation of a set of characters. */
struct cvec
{
int nchrs; /* number of chrs */
@ -241,17 +241,9 @@ struct cvec
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */
int nmcces; /* number of MCCEs */
int mccespace; /* number of MCCEs possible */
int nmccechrs; /* number of chrs used for MCCEs */
chr *mcces[1]; /* pointers to 0-terminated MCCEs */
/* and both batches of chrs are on the end */
/* both batches of chrs are on the end */
};
/* caution: this value cannot be changed easily */
#define MAXMCCE 2 /* length of longest MCCE */
/*
* definitions for NFA internal representation