Sync regex code with Tcl 8.6.4.

Sync our regex code with upstream changes since last time we did this,
which was Tcl 8.5.11 (see commit 08fd6ff37f).

The only functional change here is to disbelieve that an octal escape is
three digits long if it would exceed \377.  That's a bug fix, but it's
a minor one and could change the interpretation of working regexes, so
don't back-patch.

In addition to that, s/INFINITY/DUPINF/ to eliminate the risk of collisions
with <math.h>'s macro, and s/LOCAL/NOPROP/ because that also seems like
an unnecessarily collision-prone macro name.

There were some other cosmetic changes in their copy that I did not adopt,
notably a rather half-hearted attempt at renaming some of the C functions
in a more verbose style.  (I'm not necessarily against the concept, but
renaming just a few functions in the package is not an improvement.)
This commit is contained in:
Tom Lane 2015-09-16 15:25:25 -04:00
parent d0f18cde7e
commit b44d92b67b
4 changed files with 21 additions and 18 deletions

View File

@ -860,6 +860,12 @@ lexescape(struct vars * v)
c = lexdigits(v, 8, 1, 3);
if (ISERR())
FAILW(REG_EESCAPE);
if (c > 0xff)
{
/* out of range, so we handled one digit too much */
v->now--;
c >>= 3;
}
RETV(PLAIN, c);
break;
default:

View File

@ -960,13 +960,13 @@ parseqatom(struct vars * v,
{
case '*':
m = 0;
n = INFINITY;
n = DUPINF;
qprefer = (v->nextvalue) ? LONGER : SHORTER;
NEXT();
break;
case '+':
m = 1;
n = INFINITY;
n = DUPINF;
qprefer = (v->nextvalue) ? LONGER : SHORTER;
NEXT();
break;
@ -984,7 +984,7 @@ parseqatom(struct vars * v,
if (SEE(DIGIT))
n = scannum(v);
else
n = INFINITY;
n = DUPINF;
if (m > n)
{
ERR(REG_BADBR);
@ -1146,8 +1146,8 @@ parseqatom(struct vars * v,
* really care where its submatches are.
*/
dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
assert(m >= 1 && m != INFINITY && n >= 1);
repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1);
assert(m >= 1 && m != DUPINF && n >= 1);
repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1);
f = COMBINE(qprefer, atom->flags);
t = subre(v, '.', f, s, atom->end); /* prefix and atom */
NOERR();
@ -1268,7 +1268,7 @@ repeat(struct vars * v,
#define SOME 2
#define INF 3
#define PAIR(x, y) ((x)*4 + (y))
#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
#define REDUCE(x) ( ((x) == DUPINF) ? INF : (((x) > 1) ? SOME : (x)) )
const int rm = REDUCE(m);
const int rn = REDUCE(n);
struct state *s;
@ -2026,7 +2026,7 @@ stdump(struct subre * t,
if (t->min != 1 || t->max != 1)
{
fprintf(f, " {%d,", t->min);
if (t->max != INFINITY)
if (t->max != DUPINF)
fprintf(f, "%d", t->max);
fprintf(f, "}");
}

View File

@ -865,7 +865,7 @@ cbrdissect(struct vars * v,
if (tlen % brlen != 0)
return REG_NOMATCH;
numreps = tlen / brlen;
if (numreps < min || (numreps > max && max != INFINITY))
if (numreps < min || (numreps > max && max != DUPINF))
return REG_NOMATCH;
/* okay, compare the actual string contents */
@ -964,7 +964,7 @@ citerdissect(struct vars * v,
* sub-match endpoints in endpts[1..max_matches].
*/
max_matches = end - begin;
if (max_matches > t->max && t->max != INFINITY)
if (max_matches > t->max && t->max != DUPINF)
max_matches = t->max;
if (max_matches < min_matches)
max_matches = min_matches;
@ -1149,7 +1149,7 @@ creviterdissect(struct vars * v,
* sub-match endpoints in endpts[1..max_matches].
*/
max_matches = end - begin;
if (max_matches > t->max && t->max != INFINITY)
if (max_matches > t->max && t->max != DUPINF)
max_matches = t->max;
if (max_matches < min_matches)
max_matches = min_matches;

View File

@ -78,9 +78,6 @@
#endif
/* want size of a char in bits, and max value in bounded quantifiers */
#ifndef CHAR_BIT
#include <limits.h>
#endif
#ifndef _POSIX2_RE_DUP_MAX
#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
#endif
@ -95,7 +92,7 @@
#define xxx 1
#define DUPMAX _POSIX2_RE_DUP_MAX
#define INFINITY (DUPMAX+1)
#define DUPINF (DUPMAX+1)
#define REMAGIC 0xfed7 /* magic number for main struct */
@ -419,15 +416,15 @@ struct subre
#define LONGER 01 /* prefers longer match */
#define SHORTER 02 /* prefers shorter match */
#define MIXED 04 /* mixed preference below */
#define CAP 010 /* capturing parens below */
#define CAP 010 /* capturing parens below */
#define BACKR 020 /* back reference below */
#define INUSE 0100 /* in use in final tree */
#define LOCAL 03 /* bits which may not propagate up */
#define NOPROP 03 /* bits which may not propagate up */
#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
#define UP(f) (((f)&~NOPROP) | (LMIX(f) & SMIX(f) & MIXED))
#define MESSY(f) ((f)&(MIXED|CAP|BACKR))
#define PREF(f) ((f)&LOCAL)
#define PREF(f) ((f)&NOPROP)
#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
short id; /* ID of subre (1..ntree-1) */