diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index 6f2c0cb3eb..00da05571a 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -792,13 +792,13 @@ lexescape(struct vars * v) break; case CHR('u'): c = lexdigits(v, 16, 4, 4); - if (ISERR()) + if (ISERR() || c < CHR_MIN || c > CHR_MAX) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; case CHR('U'): c = lexdigits(v, 16, 8, 8); - if (ISERR()) + if (ISERR() || c < CHR_MIN || c > CHR_MAX) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; @@ -816,7 +816,7 @@ lexescape(struct vars * v) case CHR('x'): NOTE(REG_UUNPORT); c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) + if (ISERR() || c < CHR_MIN || c > CHR_MAX) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; @@ -872,6 +872,9 @@ lexescape(struct vars * v) /* * lexdigits - slurp up digits and return chr value + * + * This does not account for overflow; callers should range-check the result + * if maxlen is large enough to make that possible. */ static chr /* chr value; errors signalled via ERR */ lexdigits(struct vars * v, diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c index e7bbb50ef4..4fe62921e3 100644 --- a/src/backend/regex/regc_locale.c +++ b/src/backend/regex/regc_locale.c @@ -408,8 +408,7 @@ range(struct vars * v, /* context */ int nchrs; struct cvec *cv; celt c, - lc, - uc; + cc; if (a != b && !before(a, b)) { @@ -427,24 +426,51 @@ range(struct vars * v, /* context */ /* * When case-independent, it's hard to decide when cvec ranges are usable, - * so for now at least, we won't try. We allocate enough space for two - * case variants plus a little extra for the two title case variants. + * so for now at least, we won't try. We use a range for the originally + * specified chrs and then add on any case-equivalents that are outside + * that range as individual chrs. + * + * To ensure sane behavior if someone specifies a very large range, limit + * the allocation size to 100000 chrs (arbitrary) and check for overrun + * inside the loop below. */ + nchrs = b - a + 1; + if (nchrs <= 0 || nchrs > 100000) + nchrs = 100000; - nchrs = (b - a + 1) * 2 + 4; - - cv = getcvec(v, nchrs, 0); + cv = getcvec(v, nchrs, 1); NOERRN(); + addrange(cv, a, b); for (c = a; c <= b; c++) { - addchr(cv, c); - lc = pg_wc_tolower((chr) c); - if (c != lc) - addchr(cv, lc); - uc = pg_wc_toupper((chr) c); - if (c != uc) - addchr(cv, uc); + cc = pg_wc_tolower((chr) c); + if (cc != c && + (before(cc, a) || before(b, cc))) + { + if (cv->nchrs >= cv->chrspace) + { + ERR(REG_ETOOBIG); + return NULL; + } + addchr(cv, cc); + } + cc = pg_wc_toupper((chr) c); + if (cc != c && + (before(cc, a) || before(b, cc))) + { + if (cv->nchrs >= cv->chrspace) + { + ERR(REG_ETOOBIG); + return NULL; + } + addchr(cv, cc); + } + if (CANCEL_REQUESTED(v->re)) + { + ERR(REG_CANCEL); + return NULL; + } } return cv; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 487b5dabb8..7ae9673a7d 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1586,6 +1586,7 @@ dovec(struct vars * v, { ch = *p; newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); + NOERR(); } /* and the ranges */ @@ -1595,6 +1596,7 @@ dovec(struct vars * v, to = *(p + 1); if (from <= to) subrange(v, from, to, lp, rp); + NOERR(); } } diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h index dbb461a0ce..3f1d14e190 100644 --- a/src/include/regex/regcustom.h +++ b/src/include/regex/regcustom.h @@ -65,7 +65,8 @@ typedef int celt; /* type to hold chr, or NOCELT */ #define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ #define CHRBITS 32 /* bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ -#define CHR_MAX 0xfffffffe /* CHR_MAX-CHR_MIN+1 should fit in uchr */ +#define CHR_MAX 0x7ffffffe /* CHR_MAX-CHR_MIN+1 must fit in an int, and + * CHR_MAX+1 must fit in both chr and celt */ /* functions operating on chr */ #define iscalnum(x) pg_wc_isalnum(x) diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index ba2923982f..2b4f2ec252 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -326,3 +326,5 @@ select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs ERROR: invalid regular expression: invalid backreference number select 'xyz' ~ 'x(\w)(?=(\1))'; ERROR: invalid regular expression: invalid backreference number +select 'a' ~ '\x7fffffff'; -- invalid chr code +ERROR: invalid regular expression: invalid escape \ sequence diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index 7cf5e59982..635f068eae 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -86,3 +86,4 @@ select 'a' ~ '()+\1'; -- Error conditions select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs select 'xyz' ~ 'x(\w)(?=(\1))'; +select 'a' ~ '\x7fffffff'; -- invalid chr code