diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index a6d2ffbf9e..59921b46cf 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -22,6 +22,7 @@ SUBDIRS = \ test_pg_dump \ test_predtest \ test_rbtree \ + test_regex \ test_rls_hooks \ test_shm_mq \ unsafe_tests \ diff --git a/src/test/modules/test_regex/.gitignore b/src/test/modules/test_regex/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/src/test/modules/test_regex/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_regex/Makefile b/src/test/modules/test_regex/Makefile new file mode 100644 index 0000000000..dfbc5dc44b --- /dev/null +++ b/src/test/modules/test_regex/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_regex/Makefile + +MODULE_big = test_regex +OBJS = \ + $(WIN32RES) \ + test_regex.o +PGFILEDESC = "test_regex - test code for backend/regex/" + +EXTENSION = test_regex +DATA = test_regex--1.0.sql + +REGRESS = test_regex test_regex_utf8 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_regex +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_regex/README b/src/test/modules/test_regex/README new file mode 100644 index 0000000000..3ef152d4e1 --- /dev/null +++ b/src/test/modules/test_regex/README @@ -0,0 +1,78 @@ +test_regex is a module for testing the regular expression package. +It is mostly meant to allow us to absorb Tcl's regex test suite. +Therefore, there are provisions to exercise regex features that +aren't currently exposed at the SQL level by PostgreSQL. + +Currently, one function is provided: + +test_regex(pattern text, string text, flags text) returns setof text[] + +Reports an error if the pattern is an invalid regex. Otherwise, +the first row of output contains the number of subexpressions, +followed by words reporting set bit(s) in the regex's re_info field. +If the pattern doesn't match the string, that's all. +If the pattern does match, the next row contains the whole match +as the first array element. If there are parenthesized subexpression(s), +following array elements contain the matches to those subexpressions. +If the "g" (glob) flag is set, then additional row(s) of output similarly +report any additional matches. + +The "flags" argument is a string of zero or more single-character +flags that modify the behavior of the regex package or the test +function. As described in Tcl's reg.test file: + +The flag characters are complex and a bit eclectic. Generally speaking, +lowercase letters are compile options, uppercase are expected re_info +bits, and nonalphabetics are match options, controls for how the test is +run, or testing options. The one small surprise is that AREs are the +default, and you must explicitly request lesser flavors of RE. The flags +are as follows. It is admitted that some are not very mnemonic. + + - no-op (placeholder) + 0 report indices not actual strings + (This substitutes for Tcl's -indices switch) + ! expect partial match, report start position anyway + % force small state-set cache in matcher (to test cache replace) + ^ beginning of string is not beginning of line + $ end of string is not end of line + * test is Unicode-specific, needs big character set + + provide fake xy equivalence class and ch collating element + (Note: the equivalence class is implemented, the + collating element is not; so references to [.ch.] fail) + , set REG_PROGRESS (only useful in REG_DEBUG builds) + . set REG_DUMP (only useful in REG_DEBUG builds) + : set REG_MTRACE (only useful in REG_DEBUG builds) + ; set REG_FTRACE (only useful in REG_DEBUG builds) + + & test as both ARE and BRE + (Not implemented in Postgres, we use separate tests) + b BRE + e ERE + a turn advanced-features bit on (error unless ERE already) + q literal string, no metacharacters at all + + g global match (find all matches) + i case-independent matching + o ("opaque") do not return match locations + p newlines are half-magic, excluded from . and [^ only + w newlines are half-magic, significant to ^ and $ only + n newlines are fully magic, both effects + x expanded RE syntax + t incomplete-match reporting + c canmatch (equivalent to "t0!", in Postgres implementation) + s match only at start (REG_BOSONLY) + + A backslash-_a_lphanumeric seen + B ERE/ARE literal-_b_race heuristic used + E backslash (_e_scape) seen within [] + H looka_h_ead constraint seen + I _i_mpossible to match + L _l_ocale-specific construct seen + M unportable (_m_achine-specific) construct seen + N RE can match empty (_n_ull) string + P non-_P_OSIX construct seen + Q {} _q_uantifier seen + R back _r_eference seen + S POSIX-un_s_pecified syntax seen + T prefers shortest (_t_iny) + U saw original-POSIX botch: unmatched right paren in ERE (_u_gh) diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out new file mode 100644 index 0000000000..b62cbac697 --- /dev/null +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -0,0 +1,4551 @@ +-- This file is based on tests/reg.test from the Tcl distribution, +-- which is marked +-- # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. +-- The full copyright notice can be found in src/backend/regex/COPYRIGHT. +-- Most commented lines below are copied from reg.test. Each +-- test case is followed by an equivalent test using test_regex(). +create extension test_regex; +set standard_conforming_strings = on; +-- # support functions and preliminary misc. +-- # This is sensitive to changes in message wording, but we really have to +-- # test the code->message expansion at least once. +-- ::tcltest::test reg-0.1 "regexp error reporting" { +-- list [catch {regexp (*) ign} msg] $msg +-- } {1 {couldn't compile regular expression pattern: quantifier operand invalid}} +select * from test_regex('(*)', '', ''); +ERROR: invalid regular expression: quantifier operand invalid +-- doing 1 "basic sanity checks" +-- expectMatch 1.1 & abc abc abc +select * from test_regex('abc', 'abc', ''); + test_regex +------------ + {0} + {abc} +(2 rows) + +select * from test_regex('abc', 'abc', 'b'); + test_regex +------------ + {0} + {abc} +(2 rows) + +-- expectNomatch 1.2 & abc def +select * from test_regex('abc', 'def', ''); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('abc', 'def', 'b'); + test_regex +------------ + {0} +(1 row) + +-- expectMatch 1.3 & abc xyabxabce abc +select * from test_regex('abc', 'xyabxabce', ''); + test_regex +------------ + {0} + {abc} +(2 rows) + +select * from test_regex('abc', 'xyabxabce', 'b'); + test_regex +------------ + {0} + {abc} +(2 rows) + +-- doing 2 "invalid option combinations" +-- expectError 2.1 qe a INVARG +select * from test_regex('a', '', 'qe'); +ERROR: invalid regular expression: invalid argument to regex function +-- expectError 2.2 qa a INVARG +select * from test_regex('a', '', 'qa'); +ERROR: invalid regular expression: invalid argument to regex function +-- expectError 2.3 qx a INVARG +select * from test_regex('a', '', 'qx'); +ERROR: invalid regular expression: invalid argument to regex function +-- expectError 2.4 qn a INVARG +select * from test_regex('a', '', 'qn'); +ERROR: invalid regular expression: invalid argument to regex function +-- expectError 2.5 ba a INVARG +select * from test_regex('a', '', 'ba'); +ERROR: invalid regular expression: invalid argument to regex function +-- doing 3 "basic syntax" +-- expectIndices 3.1 &NS "" a {0 -1} +select * from test_regex('', 'a', '0NS'); + test_regex +--------------------------------- + {0,REG_UUNSPEC,REG_UEMPTYMATCH} + {"0 -1"} +(2 rows) + +select * from test_regex('', 'a', '0NSb'); + test_regex +--------------------------------- + {0,REG_UUNSPEC,REG_UEMPTYMATCH} + {"0 -1"} +(2 rows) + +-- expectMatch 3.2 NS a| a a +select * from test_regex('a|', 'a', 'NS'); + test_regex +--------------------------------- + {0,REG_UUNSPEC,REG_UEMPTYMATCH} + {a} +(2 rows) + +-- expectMatch 3.3 - a|b a a +select * from test_regex('a|b', 'a', '-'); + test_regex +------------ + {0} + {a} +(2 rows) + +-- expectMatch 3.4 - a|b b b +select * from test_regex('a|b', 'b', '-'); + test_regex +------------ + {0} + {b} +(2 rows) + +-- expectMatch 3.5 NS a||b b b +select * from test_regex('a||b', 'b', 'NS'); + test_regex +--------------------------------- + {0,REG_UUNSPEC,REG_UEMPTYMATCH} + {b} +(2 rows) + +-- expectMatch 3.6 & ab ab ab +select * from test_regex('ab', 'ab', ''); + test_regex +------------ + {0} + {ab} +(2 rows) + +select * from test_regex('ab', 'ab', 'b'); + test_regex +------------ + {0} + {ab} +(2 rows) + +-- doing 4 "parentheses" +-- expectMatch 4.1 - (a)e ae ae a +select * from test_regex('(a)e', 'ae', '-'); + test_regex +------------ + {1} + {ae,a} +(2 rows) + +-- expectMatch 4.2 o (a)e ae +select * from test_regex('(a)e', 'ae', 'o'); + test_regex +------------ + {0} + {NULL} +(2 rows) + +-- expectMatch 4.3 b {\(a\)b} ab ab a +select * from test_regex('\(a\)b', 'ab', 'b'); + test_regex +------------ + {1} + {ab,a} +(2 rows) + +-- expectMatch 4.4 - a((b)c) abc abc bc b +select * from test_regex('a((b)c)', 'abc', '-'); + test_regex +------------ + {2} + {abc,bc,b} +(2 rows) + +-- expectMatch 4.5 - a(b)(c) abc abc b c +select * from test_regex('a(b)(c)', 'abc', '-'); + test_regex +------------ + {2} + {abc,b,c} +(2 rows) + +-- expectError 4.6 - a(b EPAREN +select * from test_regex('a(b', '', '-'); +ERROR: invalid regular expression: parentheses () not balanced +-- expectError 4.7 b {a\(b} EPAREN +select * from test_regex('a\(b', '', 'b'); +ERROR: invalid regular expression: parentheses () not balanced +-- # sigh, we blew it on the specs here... someday this will be fixed in POSIX, +-- # but meanwhile, it's fixed in AREs +-- expectMatch 4.8 eU a)b a)b a)b +select * from test_regex('a)b', 'a)b', 'eU'); + test_regex +----------------- + {0,REG_UPBOTCH} + {a)b} +(2 rows) + +-- expectError 4.9 - a)b EPAREN +select * from test_regex('a)b', '', '-'); +ERROR: invalid regular expression: parentheses () not balanced +-- expectError 4.10 b {a\)b} EPAREN +select * from test_regex('a\)b', '', 'b'); +ERROR: invalid regular expression: parentheses () not balanced +-- expectMatch 4.11 P a(?:b)c abc abc +select * from test_regex('a(?:b)c', 'abc', 'P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {abc} +(2 rows) + +-- expectError 4.12 e a(?:b)c BADRPT +select * from test_regex('a(?:b)c', '', 'e'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectIndices 4.13 S a()b ab {0 1} {1 0} +select * from test_regex('a()b', 'ab', '0S'); + test_regex +----------------- + {1,REG_UUNSPEC} + {"0 1","1 0"} +(2 rows) + +-- expectMatch 4.14 SP a(?:)b ab ab +select * from test_regex('a(?:)b', 'ab', 'SP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_UUNSPEC} + {ab} +(2 rows) + +-- expectIndices 4.15 S a(|b)c ac {0 1} {1 0} +select * from test_regex('a(|b)c', 'ac', '0S'); + test_regex +----------------- + {1,REG_UUNSPEC} + {"0 1","1 0"} +(2 rows) + +-- expectMatch 4.16 S a(b|)c abc abc b +select * from test_regex('a(b|)c', 'abc', 'S'); + test_regex +----------------- + {1,REG_UUNSPEC} + {abc,b} +(2 rows) + +-- doing 5 "simple one-char matching" +-- # general case of brackets done later +-- expectMatch 5.1 & a.b axb axb +select * from test_regex('a.b', 'axb', ''); + test_regex +------------ + {0} + {axb} +(2 rows) + +select * from test_regex('a.b', 'axb', 'b'); + test_regex +------------ + {0} + {axb} +(2 rows) + +-- expectNomatch 5.2 &n "a.b" "a\nb" +select * from test_regex('a.b', E'a\nb', 'n'); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('a.b', E'a\nb', 'nb'); + test_regex +------------ + {0} +(1 row) + +-- expectMatch 5.3 & {a[bc]d} abd abd +select * from test_regex('a[bc]d', 'abd', ''); + test_regex +------------ + {0} + {abd} +(2 rows) + +select * from test_regex('a[bc]d', 'abd', 'b'); + test_regex +------------ + {0} + {abd} +(2 rows) + +-- expectMatch 5.4 & {a[bc]d} acd acd +select * from test_regex('a[bc]d', 'acd', ''); + test_regex +------------ + {0} + {acd} +(2 rows) + +select * from test_regex('a[bc]d', 'acd', 'b'); + test_regex +------------ + {0} + {acd} +(2 rows) + +-- expectNomatch 5.5 & {a[bc]d} aed +select * from test_regex('a[bc]d', 'aed', ''); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('a[bc]d', 'aed', 'b'); + test_regex +------------ + {0} +(1 row) + +-- expectNomatch 5.6 & {a[^bc]d} abd +select * from test_regex('a[^bc]d', 'abd', ''); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('a[^bc]d', 'abd', 'b'); + test_regex +------------ + {0} +(1 row) + +-- expectMatch 5.7 & {a[^bc]d} aed aed +select * from test_regex('a[^bc]d', 'aed', ''); + test_regex +------------ + {0} + {aed} +(2 rows) + +select * from test_regex('a[^bc]d', 'aed', 'b'); + test_regex +------------ + {0} + {aed} +(2 rows) + +-- expectNomatch 5.8 &p "a\[^bc]d" "a\nd" +select * from test_regex('a[^bc]d', E'a\nd', 'p'); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('a[^bc]d', E'a\nd', 'pb'); + test_regex +------------ + {0} +(1 row) + +-- doing 6 "context-dependent syntax" +-- # plus odds and ends +-- expectError 6.1 - * BADRPT +select * from test_regex('*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectMatch 6.2 b * * * +select * from test_regex('*', '*', 'b'); + test_regex +------------ + {0} + {*} +(2 rows) + +-- expectMatch 6.3 b {\(*\)} * * * +select * from test_regex('\(*\)', '*', 'b'); + test_regex +------------ + {1} + {*,*} +(2 rows) + +-- expectError 6.4 - (*) BADRPT +select * from test_regex('(*)', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectMatch 6.5 b ^* * * +select * from test_regex('^*', '*', 'b'); + test_regex +------------ + {0} + {*} +(2 rows) + +-- expectError 6.6 - ^* BADRPT +select * from test_regex('^*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectNomatch 6.7 & ^b ^b +select * from test_regex('^b', '^b', ''); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('^b', '^b', 'b'); + test_regex +------------ + {0} +(1 row) + +-- expectMatch 6.8 b x^ x^ x^ +select * from test_regex('x^', 'x^', 'b'); + test_regex +------------ + {0} + {x^} +(2 rows) + +-- expectNomatch 6.9 I x^ x +select * from test_regex('x^', 'x', 'I'); + test_regex +--------------------- + {0,REG_UIMPOSSIBLE} +(1 row) + +-- expectMatch 6.10 n "\n^" "x\nb" "\n" +select * from test_regex(E'\n^', E'x\nb', 'n'); + test_regex +------------ + {0} + {" + + "} +(2 rows) + +-- expectNomatch 6.11 bS {\(^b\)} ^b +select * from test_regex('\(^b\)', '^b', 'bS'); + test_regex +----------------- + {1,REG_UUNSPEC} +(1 row) + +-- expectMatch 6.12 - (^b) b b b +select * from test_regex('(^b)', 'b', '-'); + test_regex +------------ + {1} + {b,b} +(2 rows) + +-- expectMatch 6.13 & {x$} x x +select * from test_regex('x$', 'x', ''); + test_regex +------------ + {0} + {x} +(2 rows) + +select * from test_regex('x$', 'x', 'b'); + test_regex +------------ + {0} + {x} +(2 rows) + +-- expectMatch 6.14 bS {\(x$\)} x x x +select * from test_regex('\(x$\)', 'x', 'bS'); + test_regex +----------------- + {1,REG_UUNSPEC} + {x,x} +(2 rows) + +-- expectMatch 6.15 - {(x$)} x x x +select * from test_regex('(x$)', 'x', '-'); + test_regex +------------ + {1} + {x,x} +(2 rows) + +-- expectMatch 6.16 b {x$y} "x\$y" "x\$y" +select * from test_regex('x$y', 'x$y', 'b'); + test_regex +------------ + {0} + {x$y} +(2 rows) + +-- expectNomatch 6.17 I {x$y} xy +select * from test_regex('x$y', 'xy', 'I'); + test_regex +--------------------- + {0,REG_UIMPOSSIBLE} +(1 row) + +-- expectMatch 6.18 n "x\$\n" "x\n" "x\n" +select * from test_regex(E'x$\n', E'x\n', 'n'); + test_regex +------------ + {0} + {"x + + "} +(2 rows) + +-- expectError 6.19 - + BADRPT +select * from test_regex('+', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 6.20 - ? BADRPT +select * from test_regex('?', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- These two are not yet incorporated in Tcl, cf +-- https://core.tcl-lang.org/tcl/artifact/106269fa65d96b83 +-- expectError 6.21 - {x(\w)(?=(\1))} ESUBREG +select * from test_regex('x(\w)(?=(\1))', '', '-'); +ERROR: invalid regular expression: invalid backreference number +-- expectMatch 6.22 HP {x(?=((foo)))} xfoo x +select * from test_regex('x(?=((foo)))', 'xfoo', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {x} +(2 rows) + +-- doing 7 "simple quantifiers" +-- expectMatch 7.1 &N a* aa aa +select * from test_regex('a*', 'aa', 'N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {aa} +(2 rows) + +select * from test_regex('a*', 'aa', 'Nb'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {aa} +(2 rows) + +-- expectIndices 7.2 &N a* b {0 -1} +select * from test_regex('a*', 'b', '0N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"0 -1"} +(2 rows) + +select * from test_regex('a*', 'b', '0Nb'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"0 -1"} +(2 rows) + +-- expectMatch 7.3 - a+ aa aa +select * from test_regex('a+', 'aa', '-'); + test_regex +------------ + {0} + {aa} +(2 rows) + +-- expectMatch 7.4 - a?b ab ab +select * from test_regex('a?b', 'ab', '-'); + test_regex +------------ + {0} + {ab} +(2 rows) + +-- expectMatch 7.5 - a?b b b +select * from test_regex('a?b', 'b', '-'); + test_regex +------------ + {0} + {b} +(2 rows) + +-- expectError 7.6 - ** BADRPT +select * from test_regex('**', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectMatch 7.7 bN ** *** *** +select * from test_regex('**', '***', 'bN'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {***} +(2 rows) + +-- expectError 7.8 & a** BADRPT +select * from test_regex('a**', '', ''); +ERROR: invalid regular expression: quantifier operand invalid +select * from test_regex('a**', '', 'b'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 7.9 & a**b BADRPT +select * from test_regex('a**b', '', ''); +ERROR: invalid regular expression: quantifier operand invalid +select * from test_regex('a**b', '', 'b'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 7.10 & *** BADRPT +select * from test_regex('***', '', ''); +ERROR: invalid regular expression: quantifier operand invalid +select * from test_regex('***', '', 'b'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 7.11 - a++ BADRPT +select * from test_regex('a++', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 7.12 - a?+ BADRPT +select * from test_regex('a?+', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 7.13 - a?* BADRPT +select * from test_regex('a?*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 7.14 - a+* BADRPT +select * from test_regex('a+*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 7.15 - a*+ BADRPT +select * from test_regex('a*+', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- doing 8 "braces" +-- expectMatch 8.1 NQ "a{0,1}" "" "" +select * from test_regex('a{0,1}', '', 'NQ'); + test_regex +--------------------------------- + {0,REG_UBOUNDS,REG_UEMPTYMATCH} + {""} +(2 rows) + +-- expectMatch 8.2 NQ "a{0,1}" ac a +select * from test_regex('a{0,1}', 'ac', 'NQ'); + test_regex +--------------------------------- + {0,REG_UBOUNDS,REG_UEMPTYMATCH} + {a} +(2 rows) + +-- expectError 8.3 - "a{1,0}" BADBR +select * from test_regex('a{1,0}', '', '-'); +ERROR: invalid regular expression: invalid repetition count(s) +-- expectError 8.4 - "a{1,2,3}" BADBR +select * from test_regex('a{1,2,3}', '', '-'); +ERROR: invalid regular expression: invalid repetition count(s) +-- expectError 8.5 - "a{257}" BADBR +select * from test_regex('a{257}', '', '-'); +ERROR: invalid regular expression: invalid repetition count(s) +-- expectError 8.6 - "a{1000}" BADBR +select * from test_regex('a{1000}', '', '-'); +ERROR: invalid regular expression: invalid repetition count(s) +-- expectError 8.7 - "a{1" EBRACE +select * from test_regex('a{1', '', '-'); +ERROR: invalid regular expression: braces {} not balanced +-- expectError 8.8 - "a{1n}" BADBR +select * from test_regex('a{1n}', '', '-'); +ERROR: invalid regular expression: invalid repetition count(s) +-- expectMatch 8.9 BS "a{b" "a\{b" "a\{b" +select * from test_regex('a{b', 'a{b', 'BS'); + test_regex +----------------------------- + {0,REG_UBRACES,REG_UUNSPEC} + {"a{b"} +(2 rows) + +-- expectMatch 8.10 BS "a{" "a\{" "a\{" +select * from test_regex('a{', 'a{', 'BS'); + test_regex +----------------------------- + {0,REG_UBRACES,REG_UUNSPEC} + {"a{"} +(2 rows) + +-- expectMatch 8.11 bQ "a\\{0,1\\}b" cb b +select * from test_regex('a\{0,1\}b', 'cb', 'bQ'); + test_regex +----------------- + {0,REG_UBOUNDS} + {b} +(2 rows) + +-- expectError 8.12 b "a\\{0,1" EBRACE +select * from test_regex('a\{0,1', '', 'b'); +ERROR: invalid regular expression: braces {} not balanced +-- expectError 8.13 - "a{0,1\\" BADBR +select * from test_regex('a{0,1\', '', '-'); +ERROR: invalid regular expression: invalid repetition count(s) +-- expectMatch 8.14 Q "a{0}b" ab b +select * from test_regex('a{0}b', 'ab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {b} +(2 rows) + +-- expectMatch 8.15 Q "a{0,0}b" ab b +select * from test_regex('a{0,0}b', 'ab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {b} +(2 rows) + +-- expectMatch 8.16 Q "a{0,1}b" ab ab +select * from test_regex('a{0,1}b', 'ab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {ab} +(2 rows) + +-- expectMatch 8.17 Q "a{0,2}b" b b +select * from test_regex('a{0,2}b', 'b', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {b} +(2 rows) + +-- expectMatch 8.18 Q "a{0,2}b" aab aab +select * from test_regex('a{0,2}b', 'aab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {aab} +(2 rows) + +-- expectMatch 8.19 Q "a{0,}b" aab aab +select * from test_regex('a{0,}b', 'aab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {aab} +(2 rows) + +-- expectMatch 8.20 Q "a{1,1}b" aab ab +select * from test_regex('a{1,1}b', 'aab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {ab} +(2 rows) + +-- expectMatch 8.21 Q "a{1,3}b" aaaab aaab +select * from test_regex('a{1,3}b', 'aaaab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {aaab} +(2 rows) + +-- expectNomatch 8.22 Q "a{1,3}b" b +select * from test_regex('a{1,3}b', 'b', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} +(1 row) + +-- expectMatch 8.23 Q "a{1,}b" aab aab +select * from test_regex('a{1,}b', 'aab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {aab} +(2 rows) + +-- expectNomatch 8.24 Q "a{2,3}b" ab +select * from test_regex('a{2,3}b', 'ab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} +(1 row) + +-- expectMatch 8.25 Q "a{2,3}b" aaaab aaab +select * from test_regex('a{2,3}b', 'aaaab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {aaab} +(2 rows) + +-- expectNomatch 8.26 Q "a{2,}b" ab +select * from test_regex('a{2,}b', 'ab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} +(1 row) + +-- expectMatch 8.27 Q "a{2,}b" aaaab aaaab +select * from test_regex('a{2,}b', 'aaaab', 'Q'); + test_regex +----------------- + {0,REG_UBOUNDS} + {aaaab} +(2 rows) + +-- doing 9 "brackets" +-- expectMatch 9.1 & {a[bc]} ac ac +select * from test_regex('a[bc]', 'ac', ''); + test_regex +------------ + {0} + {ac} +(2 rows) + +select * from test_regex('a[bc]', 'ac', 'b'); + test_regex +------------ + {0} + {ac} +(2 rows) + +-- expectMatch 9.2 & {a[-]} a- a- +select * from test_regex('a[-]', 'a-', ''); + test_regex +------------ + {0} + {a-} +(2 rows) + +select * from test_regex('a[-]', 'a-', 'b'); + test_regex +------------ + {0} + {a-} +(2 rows) + +-- expectMatch 9.3 & {a[[.-.]]} a- a- +select * from test_regex('a[[.-.]]', 'a-', ''); + test_regex +------------ + {0} + {a-} +(2 rows) + +select * from test_regex('a[[.-.]]', 'a-', 'b'); + test_regex +------------ + {0} + {a-} +(2 rows) + +-- expectMatch 9.4 &L {a[[.zero.]]} a0 a0 +select * from test_regex('a[[.zero.]]', 'a0', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {a0} +(2 rows) + +select * from test_regex('a[[.zero.]]', 'a0', 'Lb'); + test_regex +----------------- + {0,REG_ULOCALE} + {a0} +(2 rows) + +-- expectMatch 9.5 &LM {a[[.zero.]-9]} a2 a2 +select * from test_regex('a[[.zero.]-9]', 'a2', 'LM'); + test_regex +----------------------------- + {0,REG_UUNPORT,REG_ULOCALE} + {a2} +(2 rows) + +select * from test_regex('a[[.zero.]-9]', 'a2', 'LMb'); + test_regex +----------------------------- + {0,REG_UUNPORT,REG_ULOCALE} + {a2} +(2 rows) + +-- expectMatch 9.6 &M {a[0-[.9.]]} a2 a2 +select * from test_regex('a[0-[.9.]]', 'a2', 'M'); + test_regex +----------------- + {0,REG_UUNPORT} + {a2} +(2 rows) + +select * from test_regex('a[0-[.9.]]', 'a2', 'Mb'); + test_regex +----------------- + {0,REG_UUNPORT} + {a2} +(2 rows) + +-- expectMatch 9.7 &+L {a[[=x=]]} ax ax +select * from test_regex('a[[=x=]]', 'ax', '+L'); + test_regex +----------------- + {0,REG_ULOCALE} + {ax} +(2 rows) + +select * from test_regex('a[[=x=]]', 'ax', '+Lb'); + test_regex +----------------- + {0,REG_ULOCALE} + {ax} +(2 rows) + +-- expectMatch 9.8 &+L {a[[=x=]]} ay ay +select * from test_regex('a[[=x=]]', 'ay', '+L'); + test_regex +----------------- + {0,REG_ULOCALE} + {ay} +(2 rows) + +select * from test_regex('a[[=x=]]', 'ay', '+Lb'); + test_regex +----------------- + {0,REG_ULOCALE} + {ay} +(2 rows) + +-- expectNomatch 9.9 &+L {a[[=x=]]} az +select * from test_regex('a[[=x=]]', 'az', '+L'); + test_regex +----------------- + {0,REG_ULOCALE} +(1 row) + +select * from test_regex('a[[=x=]]', 'az', '+Lb'); + test_regex +----------------- + {0,REG_ULOCALE} +(1 row) + +-- expectError 9.10 & {a[0-[=x=]]} ERANGE +select * from test_regex('a[0-[=x=]]', '', ''); +ERROR: invalid regular expression: invalid character range +select * from test_regex('a[0-[=x=]]', '', 'b'); +ERROR: invalid regular expression: invalid character range +-- expectMatch 9.11 &L {a[[:digit:]]} a0 a0 +select * from test_regex('a[[:digit:]]', 'a0', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {a0} +(2 rows) + +select * from test_regex('a[[:digit:]]', 'a0', 'Lb'); + test_regex +----------------- + {0,REG_ULOCALE} + {a0} +(2 rows) + +-- expectError 9.12 & {a[[:woopsie:]]} ECTYPE +select * from test_regex('a[[:woopsie:]]', '', ''); +ERROR: invalid regular expression: invalid character class +select * from test_regex('a[[:woopsie:]]', '', 'b'); +ERROR: invalid regular expression: invalid character class +-- expectNomatch 9.13 &L {a[[:digit:]]} ab +select * from test_regex('a[[:digit:]]', 'ab', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} +(1 row) + +select * from test_regex('a[[:digit:]]', 'ab', 'Lb'); + test_regex +----------------- + {0,REG_ULOCALE} +(1 row) + +-- expectError 9.14 & {a[0-[:digit:]]} ERANGE +select * from test_regex('a[0-[:digit:]]', '', ''); +ERROR: invalid regular expression: invalid character range +select * from test_regex('a[0-[:digit:]]', '', 'b'); +ERROR: invalid regular expression: invalid character range +-- expectMatch 9.15 &LP {[[:<:]]a} a a +select * from test_regex('[[:<:]]a', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +select * from test_regex('[[:<:]]a', 'a', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectMatch 9.16 &LP {a[[:>:]]} a a +select * from test_regex('a[[:>:]]', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +select * from test_regex('a[[:>:]]', 'a', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectError 9.17 & {a[[..]]b} ECOLLATE +select * from test_regex('a[[..]]b', '', ''); +ERROR: invalid regular expression: invalid collating element +select * from test_regex('a[[..]]b', '', 'b'); +ERROR: invalid regular expression: invalid collating element +-- expectError 9.18 & {a[[==]]b} ECOLLATE +select * from test_regex('a[[==]]b', '', ''); +ERROR: invalid regular expression: invalid collating element +select * from test_regex('a[[==]]b', '', 'b'); +ERROR: invalid regular expression: invalid collating element +-- expectError 9.19 & {a[[::]]b} ECTYPE +select * from test_regex('a[[::]]b', '', ''); +ERROR: invalid regular expression: invalid character class +select * from test_regex('a[[::]]b', '', 'b'); +ERROR: invalid regular expression: invalid character class +-- expectError 9.20 & {a[[.a} EBRACK +select * from test_regex('a[[.a', '', ''); +ERROR: invalid regular expression: brackets [] not balanced +select * from test_regex('a[[.a', '', 'b'); +ERROR: invalid regular expression: brackets [] not balanced +-- expectError 9.21 & {a[[=a} EBRACK +select * from test_regex('a[[=a', '', ''); +ERROR: invalid regular expression: brackets [] not balanced +select * from test_regex('a[[=a', '', 'b'); +ERROR: invalid regular expression: brackets [] not balanced +-- expectError 9.22 & {a[[:a} EBRACK +select * from test_regex('a[[:a', '', ''); +ERROR: invalid regular expression: brackets [] not balanced +select * from test_regex('a[[:a', '', 'b'); +ERROR: invalid regular expression: brackets [] not balanced +-- expectError 9.23 & {a[} EBRACK +select * from test_regex('a[', '', ''); +ERROR: invalid regular expression: brackets [] not balanced +select * from test_regex('a[', '', 'b'); +ERROR: invalid regular expression: brackets [] not balanced +-- expectError 9.24 & {a[b} EBRACK +select * from test_regex('a[b', '', ''); +ERROR: invalid regular expression: brackets [] not balanced +select * from test_regex('a[b', '', 'b'); +ERROR: invalid regular expression: brackets [] not balanced +-- expectError 9.25 & {a[b-} EBRACK +select * from test_regex('a[b-', '', ''); +ERROR: invalid regular expression: brackets [] not balanced +select * from test_regex('a[b-', '', 'b'); +ERROR: invalid regular expression: brackets [] not balanced +-- expectError 9.26 & {a[b-c} EBRACK +select * from test_regex('a[b-c', '', ''); +ERROR: invalid regular expression: brackets [] not balanced +select * from test_regex('a[b-c', '', 'b'); +ERROR: invalid regular expression: brackets [] not balanced +-- expectMatch 9.27 &M {a[b-c]} ab ab +select * from test_regex('a[b-c]', 'ab', 'M'); + test_regex +----------------- + {0,REG_UUNPORT} + {ab} +(2 rows) + +select * from test_regex('a[b-c]', 'ab', 'Mb'); + test_regex +----------------- + {0,REG_UUNPORT} + {ab} +(2 rows) + +-- expectMatch 9.28 & {a[b-b]} ab ab +select * from test_regex('a[b-b]', 'ab', ''); + test_regex +------------ + {0} + {ab} +(2 rows) + +select * from test_regex('a[b-b]', 'ab', 'b'); + test_regex +------------ + {0} + {ab} +(2 rows) + +-- expectMatch 9.29 &M {a[1-2]} a2 a2 +select * from test_regex('a[1-2]', 'a2', 'M'); + test_regex +----------------- + {0,REG_UUNPORT} + {a2} +(2 rows) + +select * from test_regex('a[1-2]', 'a2', 'Mb'); + test_regex +----------------- + {0,REG_UUNPORT} + {a2} +(2 rows) + +-- expectError 9.30 & {a[c-b]} ERANGE +select * from test_regex('a[c-b]', '', ''); +ERROR: invalid regular expression: invalid character range +select * from test_regex('a[c-b]', '', 'b'); +ERROR: invalid regular expression: invalid character range +-- expectError 9.31 & {a[a-b-c]} ERANGE +select * from test_regex('a[a-b-c]', '', ''); +ERROR: invalid regular expression: invalid character range +select * from test_regex('a[a-b-c]', '', 'b'); +ERROR: invalid regular expression: invalid character range +-- expectMatch 9.32 &M {a[--?]b} a?b a?b +select * from test_regex('a[--?]b', 'a?b', 'M'); + test_regex +----------------- + {0,REG_UUNPORT} + {a?b} +(2 rows) + +select * from test_regex('a[--?]b', 'a?b', 'Mb'); + test_regex +----------------- + {0,REG_UUNPORT} + {a?b} +(2 rows) + +-- expectMatch 9.33 & {a[---]b} a-b a-b +select * from test_regex('a[---]b', 'a-b', ''); + test_regex +------------ + {0} + {a-b} +(2 rows) + +select * from test_regex('a[---]b', 'a-b', 'b'); + test_regex +------------ + {0} + {a-b} +(2 rows) + +-- expectMatch 9.34 & {a[]b]c} a]c a]c +select * from test_regex('a[]b]c', 'a]c', ''); + test_regex +------------ + {0} + {a]c} +(2 rows) + +select * from test_regex('a[]b]c', 'a]c', 'b'); + test_regex +------------ + {0} + {a]c} +(2 rows) + +-- expectMatch 9.35 EP {a[\]]b} a]b a]b +select * from test_regex('a[\]]b', 'a]b', 'EP'); + test_regex +---------------------------- + {0,REG_UBBS,REG_UNONPOSIX} + {a]b} +(2 rows) + +-- expectNomatch 9.36 bE {a[\]]b} a]b +select * from test_regex('a[\]]b', 'a]b', 'bE'); + test_regex +-------------- + {0,REG_UBBS} +(1 row) + +-- expectMatch 9.37 bE {a[\]]b} "a\\]b" "a\\]b" +select * from test_regex('a[\]]b', 'a\]b', 'bE'); + test_regex +-------------- + {0,REG_UBBS} + {"a\\]b"} +(2 rows) + +-- expectMatch 9.38 eE {a[\]]b} "a\\]b" "a\\]b" +select * from test_regex('a[\]]b', 'a\]b', 'eE'); + test_regex +-------------- + {0,REG_UBBS} + {"a\\]b"} +(2 rows) + +-- expectMatch 9.39 EP {a[\\]b} "a\\b" "a\\b" +select * from test_regex('a[\\]b', 'a\b', 'EP'); + test_regex +---------------------------- + {0,REG_UBBS,REG_UNONPOSIX} + {"a\\b"} +(2 rows) + +-- expectMatch 9.40 eE {a[\\]b} "a\\b" "a\\b" +select * from test_regex('a[\\]b', 'a\b', 'eE'); + test_regex +-------------- + {0,REG_UBBS} + {"a\\b"} +(2 rows) + +-- expectMatch 9.41 bE {a[\\]b} "a\\b" "a\\b" +select * from test_regex('a[\\]b', 'a\b', 'bE'); + test_regex +-------------- + {0,REG_UBBS} + {"a\\b"} +(2 rows) + +-- expectError 9.42 - {a[\Z]b} EESCAPE +select * from test_regex('a[\Z]b', '', '-'); +ERROR: invalid regular expression: invalid escape \ sequence +-- expectMatch 9.43 & {a[[b]c} "a\[c" "a\[c" +select * from test_regex('a[[b]c', 'a[c', ''); + test_regex +------------ + {0} + {a[c} +(2 rows) + +select * from test_regex('a[[b]c', 'a[c', 'b'); + test_regex +------------ + {0} + {a[c} +(2 rows) + +-- This only works in UTF8 encoding, so it's moved to test_regex_utf8.sql: +-- expectMatch 9.44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ +-- "a\u0102\u02ffb" "a\u0102\u02ffb" +-- doing 10 "anchors and newlines" +-- expectMatch 10.1 & ^a a a +select * from test_regex('^a', 'a', ''); + test_regex +------------ + {0} + {a} +(2 rows) + +select * from test_regex('^a', 'a', 'b'); + test_regex +------------ + {0} + {a} +(2 rows) + +-- expectNomatch 10.2 &^ ^a a +select * from test_regex('^a', 'a', '^'); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('^a', 'a', '^b'); + test_regex +------------ + {0} +(1 row) + +-- expectIndices 10.3 &N ^ a {0 -1} +select * from test_regex('^', 'a', '0N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"0 -1"} +(2 rows) + +select * from test_regex('^', 'a', '0Nb'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"0 -1"} +(2 rows) + +-- expectIndices 10.4 & {a$} aba {2 2} +select * from test_regex('a$', 'aba', '0'); + test_regex +------------ + {0} + {"2 2"} +(2 rows) + +select * from test_regex('a$', 'aba', '0b'); + test_regex +------------ + {0} + {"2 2"} +(2 rows) + +-- expectNomatch 10.5 {&$} {a$} a +select * from test_regex('a$', 'a', '$'); + test_regex +------------ + {0} +(1 row) + +select * from test_regex('a$', 'a', '$b'); + test_regex +------------ + {0} +(1 row) + +-- expectIndices 10.6 &N {$} ab {2 1} +select * from test_regex('$', 'ab', '0N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"2 1"} +(2 rows) + +select * from test_regex('$', 'ab', '0Nb'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"2 1"} +(2 rows) + +-- expectMatch 10.7 &n ^a a a +select * from test_regex('^a', 'a', 'n'); + test_regex +------------ + {0} + {a} +(2 rows) + +select * from test_regex('^a', 'a', 'nb'); + test_regex +------------ + {0} + {a} +(2 rows) + +-- expectMatch 10.8 &n "^a" "b\na" "a" +select * from test_regex('^a', E'b\na', 'n'); + test_regex +------------ + {0} + {a} +(2 rows) + +select * from test_regex('^a', E'b\na', 'nb'); + test_regex +------------ + {0} + {a} +(2 rows) + +-- expectIndices 10.9 &w "^a" "a\na" {0 0} +select * from test_regex('^a', E'a\na', '0w'); + test_regex +------------ + {0} + {"0 0"} +(2 rows) + +select * from test_regex('^a', E'a\na', '0wb'); + test_regex +------------ + {0} + {"0 0"} +(2 rows) + +-- expectIndices 10.10 &n^ "^a" "a\na" {2 2} +select * from test_regex('^a', E'a\na', '0n^'); + test_regex +------------ + {0} + {"2 2"} +(2 rows) + +select * from test_regex('^a', E'a\na', '0n^b'); + test_regex +------------ + {0} + {"2 2"} +(2 rows) + +-- expectMatch 10.11 &n {a$} a a +select * from test_regex('a$', 'a', 'n'); + test_regex +------------ + {0} + {a} +(2 rows) + +select * from test_regex('a$', 'a', 'nb'); + test_regex +------------ + {0} + {a} +(2 rows) + +-- expectMatch 10.12 &n "a\$" "a\nb" "a" +select * from test_regex('a$', E'a\nb', 'n'); + test_regex +------------ + {0} + {a} +(2 rows) + +select * from test_regex('a$', E'a\nb', 'nb'); + test_regex +------------ + {0} + {a} +(2 rows) + +-- expectIndices 10.13 &n "a\$" "a\na" {0 0} +select * from test_regex('a$', E'a\na', '0n'); + test_regex +------------ + {0} + {"0 0"} +(2 rows) + +select * from test_regex('a$', E'a\na', '0nb'); + test_regex +------------ + {0} + {"0 0"} +(2 rows) + +-- expectIndices 10.14 N ^^ a {0 -1} +select * from test_regex('^^', 'a', '0N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"0 -1"} +(2 rows) + +-- expectMatch 10.15 b ^^ ^ ^ +select * from test_regex('^^', '^', 'b'); + test_regex +------------ + {0} + {^} +(2 rows) + +-- expectIndices 10.16 N {$$} a {1 0} +select * from test_regex('$$', 'a', '0N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"1 0"} +(2 rows) + +-- expectMatch 10.17 b {$$} "\$" "\$" +select * from test_regex('$$', '$', 'b'); + test_regex +------------ + {0} + {$} +(2 rows) + +-- expectMatch 10.18 &N {^$} "" "" +select * from test_regex('^$', '', 'N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {""} +(2 rows) + +select * from test_regex('^$', '', 'Nb'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {""} +(2 rows) + +-- expectNomatch 10.19 &N {^$} a +select * from test_regex('^$', 'a', 'N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} +(1 row) + +select * from test_regex('^$', 'a', 'Nb'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} +(1 row) + +-- expectIndices 10.20 &nN "^\$" a\n\nb {2 1} +select * from test_regex('^$', E'a\n\nb', '0nN'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"2 1"} +(2 rows) + +select * from test_regex('^$', E'a\n\nb', '0nNb'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {"2 1"} +(2 rows) + +-- expectMatch 10.21 N {$^} "" "" +select * from test_regex('$^', '', 'N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {""} +(2 rows) + +-- expectMatch 10.22 b {$^} "\$^" "\$^" +select * from test_regex('$^', '$^', 'b'); + test_regex +------------ + {0} + {$^} +(2 rows) + +-- expectMatch 10.23 P {\Aa} a a +select * from test_regex('\Aa', 'a', 'P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {a} +(2 rows) + +-- expectMatch 10.24 ^P {\Aa} a a +select * from test_regex('\Aa', 'a', '^P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {a} +(2 rows) + +-- expectNomatch 10.25 ^nP {\Aa} "b\na" +select * from test_regex('\Aa', E'b\na', '^nP'); + test_regex +------------------- + {0,REG_UNONPOSIX} +(1 row) + +-- expectMatch 10.26 P {a\Z} a a +select * from test_regex('a\Z', 'a', 'P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {a} +(2 rows) + +-- expectMatch 10.27 \$P {a\Z} a a +select * from test_regex('a\Z', 'a', '$P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {a} +(2 rows) + +-- expectNomatch 10.28 \$nP {a\Z} "a\nb" +select * from test_regex('a\Z', E'a\nb', '$nP'); + test_regex +------------------- + {0,REG_UNONPOSIX} +(1 row) + +-- expectError 10.29 - ^* BADRPT +select * from test_regex('^*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 10.30 - {$*} BADRPT +select * from test_regex('$*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 10.31 - {\A*} BADRPT +select * from test_regex('\A*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 10.32 - {\Z*} BADRPT +select * from test_regex('\Z*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- doing 11 "boundary constraints" +-- expectMatch 11.1 &LP {[[:<:]]a} a a +select * from test_regex('[[:<:]]a', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +select * from test_regex('[[:<:]]a', 'a', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectMatch 11.2 &LP {[[:<:]]a} -a a +select * from test_regex('[[:<:]]a', '-a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +select * from test_regex('[[:<:]]a', '-a', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.3 &LP {[[:<:]]a} ba +select * from test_regex('[[:<:]]a', 'ba', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +select * from test_regex('[[:<:]]a', 'ba', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 11.4 &LP {a[[:>:]]} a a +select * from test_regex('a[[:>:]]', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +select * from test_regex('a[[:>:]]', 'a', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectMatch 11.5 &LP {a[[:>:]]} a- a +select * from test_regex('a[[:>:]]', 'a-', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +select * from test_regex('a[[:>:]]', 'a-', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.6 &LP {a[[:>:]]} ab +select * from test_regex('a[[:>:]]', 'ab', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +select * from test_regex('a[[:>:]]', 'ab', 'LPb'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 11.7 bLP {\} a a +select * from test_regex('a\>', 'a', 'bLP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.10 bLP {a\>} ab +select * from test_regex('a\>', 'ab', 'bLP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 11.11 LP {\ya} a a +select * from test_regex('\ya', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.12 LP {\ya} ba +select * from test_regex('\ya', 'ba', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 11.13 LP {a\y} a a +select * from test_regex('a\y', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.14 LP {a\y} ab +select * from test_regex('a\y', 'ab', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 11.15 LP {a\Y} ab a +select * from test_regex('a\Y', 'ab', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.16 LP {a\Y} a- +select * from test_regex('a\Y', 'a-', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectNomatch 11.17 LP {a\Y} a +select * from test_regex('a\Y', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectNomatch 11.18 LP {-\Y} -a +select * from test_regex('-\Y', '-a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 11.19 LP {-\Y} -% - +select * from test_regex('-\Y', '-%', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {-} +(2 rows) + +-- expectNomatch 11.20 LP {\Y-} a- +select * from test_regex('\Y-', 'a-', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectError 11.21 - {[[:<:]]*} BADRPT +select * from test_regex('[[:<:]]*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 11.22 - {[[:>:]]*} BADRPT +select * from test_regex('[[:>:]]*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 11.23 b {\<*} BADRPT +select * from test_regex('\<*', '', 'b'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 11.24 b {\>*} BADRPT +select * from test_regex('\>*', '', 'b'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 11.25 - {\y*} BADRPT +select * from test_regex('\y*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectError 11.26 - {\Y*} BADRPT +select * from test_regex('\Y*', '', '-'); +ERROR: invalid regular expression: quantifier operand invalid +-- expectMatch 11.27 LP {\ma} a a +select * from test_regex('\ma', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.28 LP {\ma} ba +select * from test_regex('\ma', 'ba', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 11.29 LP {a\M} a a +select * from test_regex('a\M', 'a', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +-- expectNomatch 11.30 LP {a\M} ab +select * from test_regex('a\M', 'ab', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectNomatch 11.31 ILP {\Ma} a +select * from test_regex('\Ma', 'a', 'ILP'); + test_regex +----------------------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE,REG_UIMPOSSIBLE} +(1 row) + +-- expectNomatch 11.32 ILP {a\m} a +select * from test_regex('a\m', 'a', 'ILP'); + test_regex +----------------------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE,REG_UIMPOSSIBLE} +(1 row) + +-- doing 12 "character classes" +-- expectMatch 12.1 LP {a\db} a0b a0b +select * from test_regex('a\db', 'a0b', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a0b} +(2 rows) + +-- expectNomatch 12.2 LP {a\db} axb +select * from test_regex('a\db', 'axb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectNomatch 12.3 LP {a\Db} a0b +select * from test_regex('a\Db', 'a0b', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 12.4 LP {a\Db} axb axb +select * from test_regex('a\Db', 'axb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {axb} +(2 rows) + +-- expectMatch 12.5 LP "a\\sb" "a b" "a b" +select * from test_regex('a\sb', 'a b', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {"a b"} +(2 rows) + +-- expectMatch 12.6 LP "a\\sb" "a\tb" "a\tb" +select * from test_regex('a\sb', E'a\tb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {"a b"} +(2 rows) + +-- expectMatch 12.7 LP "a\\sb" "a\nb" "a\nb" +select * from test_regex('a\sb', E'a\nb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {"a + + b"} +(2 rows) + +-- expectNomatch 12.8 LP {a\sb} axb +select * from test_regex('a\sb', 'axb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 12.9 LP {a\Sb} axb axb +select * from test_regex('a\Sb', 'axb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {axb} +(2 rows) + +-- expectNomatch 12.10 LP "a\\Sb" "a b" +select * from test_regex('a\Sb', 'a b', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 12.11 LP {a\wb} axb axb +select * from test_regex('a\wb', 'axb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {axb} +(2 rows) + +-- expectNomatch 12.12 LP {a\wb} a-b +select * from test_regex('a\wb', 'a-b', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectNomatch 12.13 LP {a\Wb} axb +select * from test_regex('a\Wb', 'axb', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- expectMatch 12.14 LP {a\Wb} a-b a-b +select * from test_regex('a\Wb', 'a-b', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {a-b} +(2 rows) + +-- expectMatch 12.15 LP {\y\w+z\y} adze-guz guz +select * from test_regex('\y\w+z\y', 'adze-guz', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {guz} +(2 rows) + +-- expectMatch 12.16 LPE {a[\d]b} a1b a1b +select * from test_regex('a[\d]b', 'a1b', 'LPE'); + test_regex +---------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE} + {a1b} +(2 rows) + +-- expectMatch 12.17 LPE "a\[\\s]b" "a b" "a b" +select * from test_regex('a[\s]b', 'a b', 'LPE'); + test_regex +---------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE} + {"a b"} +(2 rows) + +-- expectMatch 12.18 LPE {a[\w]b} axb axb +select * from test_regex('a[\w]b', 'axb', 'LPE'); + test_regex +---------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE} + {axb} +(2 rows) + +-- doing 13 "escapes" +-- expectError 13.1 & "a\\" EESCAPE +select * from test_regex('a\', '', ''); +ERROR: invalid regular expression: invalid escape \ sequence +select * from test_regex('a\', '', 'b'); +ERROR: invalid regular expression: invalid escape \ sequence +-- expectMatch 13.2 - {a\]+)>} a +-- } 1 +select * from test_regex('\A\s*[^<]*\s*<([^>]+)>', 'a', 'LP'); + test_regex +------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE} + {a,a} +(2 rows) + +-- test reg-33.4 {Bug 505048} { +-- regexp {\A\s*([^b]*)b} ab +-- } 1 +select * from test_regex('\A\s*([^b]*)b', 'ab', 'LP'); + test_regex +------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE} + {ab,a} +(2 rows) + +-- test reg-33.5 {Bug 505048} { +-- regexp {\A\s*[^b]*(b)} ab +-- } 1 +select * from test_regex('\A\s*[^b]*(b)', 'ab', 'LP'); + test_regex +------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE} + {ab,b} +(2 rows) + +-- test reg-33.6 {Bug 505048} { +-- regexp {\A(\s*)[^b]*(b)} ab +-- } 1 +select * from test_regex('\A(\s*)[^b]*(b)', 'ab', 'LP'); + test_regex +------------------------------- + {2,REG_UNONPOSIX,REG_ULOCALE} + {ab,"",b} +(2 rows) + +-- test reg-33.7 {Bug 505048} { +-- regexp {\A\s*[^b]*b} ab +-- } 1 +select * from test_regex('\A\s*[^b]*b', 'ab', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {ab} +(2 rows) + +-- test reg-33.8 {Bug 505048} { +-- regexp -inline {\A\s*[^b]*b} ab +-- } ab +select * from test_regex('\A\s*[^b]*b', 'ab', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {ab} +(2 rows) + +-- test reg-33.9 {Bug 505048} { +-- regexp -indices -inline {\A\s*[^b]*b} ab +-- } {{0 1}} +select * from test_regex('\A\s*[^b]*b', 'ab', '0LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {"0 1"} +(2 rows) + +-- test reg-33.10 {Bug 840258} -body { +-- regsub {(^|\n)+\.*b} \n.b {} tmp +-- } -cleanup { +-- unset tmp +-- } -result 1 +select * from test_regex('(^|\n)+\.*b', E'\n.b', 'P'); + test_regex +------------------- + {1,REG_UNONPOSIX} + {" + + .b"," + + "} +(2 rows) + +-- test reg-33.11 {Bug 840258} -body { +-- regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \ +-- "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp +-- } -cleanup { +-- unset tmp +-- } -result 1 +select * from test_regex('(^|[\n\r]+)\.*\?<.*?(\n|\r)+', E'TQ\r\n.?<5000267>Test already stopped\r\n', 'EP'); + test_regex +----------------------------------- + {2,REG_UBBS,REG_UNONPOSIX} + {"\r + + .?<5000267>Test already stopped\r+ + ","\r + + "," + + "} +(2 rows) + +-- test reg-33.12 {Bug 1810264 - bad read} { +-- regexp {\3161573148} {\3161573148} +-- } 0 +select * from test_regex('\3161573148', '\3161573148', 'MP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_UUNPORT} +(1 row) + +-- test reg-33.13 {Bug 1810264 - infinite loop} { +-- regexp {($|^)*} {x} +-- } 1 +select * from test_regex('($|^)*', 'x', 'N'); + test_regex +--------------------- + {1,REG_UEMPTYMATCH} + {"",""} +(2 rows) + +-- # Some environments have small default stack sizes. [Bug 1905562] +-- test reg-33.14 {Bug 1810264 - super-expensive expression} nonPortable { +-- regexp {(x{200}){200}$y} {x} +-- } 0 +-- This might or might not work depending on platform, so skip it +-- select * from test_regex('(x{200}){200}$y', 'x', 'IQ'); +-- test reg-33.15.1 {Bug 3603557 - an "in the wild" RE} { +-- lindex [regexp -expanded -about { +-- ^TETRA_MODE_CMD # Message Type +-- ([[:blank:]]+) # Pad +-- (ETS_1_1|ETS_1_2|ETS_2_2) # SystemCode +-- ([[:blank:]]+) # Pad +-- (CONTINUOUS|CARRIER|MCCH|TRAFFIC) # SharingMode +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # ColourCode +-- ([[:blank:]]+) # Pad +-- (1|2|3|4|6|9|12|18) # TSReservedFrames +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # UPlaneDTX +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # Frame18Extension +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,4}) # MCC +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,5}) # MNC +-- ([[:blank:]]+) # Pad +-- (BOTH|BCAST|ENQRY|NONE) # NbrCellBcast +-- ([[:blank:]]+) # Pad +-- (UNKNOWN|LOW|MEDIUM|HIGH) # CellServiceLevel +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # LateEntryInfo +-- ([[:blank:]]+) # Pad +-- (300|400) # FrequencyBand +-- ([[:blank:]]+) # Pad +-- (NORMAL|REVERSE) # ReverseOperation +-- ([[:blank:]]+) # Pad +-- (NONE|\+6\.25|\-6\.25|\+12\.5) # Offset +-- ([[:blank:]]+) # Pad +-- (10) # DuplexSpacing +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,4}) # MainCarrierNr +-- ([[:blank:]]+) # Pad +-- (0|1|2|3) # NrCSCCH +-- ([[:blank:]]+) # Pad +-- (15|20|25|30|35|40|45) # MSTxPwrMax +-- ([[:blank:]]+) # Pad +-- (\-125|\-120|\-115|\-110|\-105|\-100|\-95|\-90|\-85|\-80|\-75|\-70|\-65|\-60|\-55|\-50) +-- # RxLevAccessMin +-- ([[:blank:]]+) # Pad +-- (\-53|\-51|\-49|\-47|\-45|\-43|\-41|\-39|\-37|\-35|\-33|\-31|\-29|\-27|\-25|\-23) +-- # AccessParameter +-- ([[:blank:]]+) # Pad +-- (DISABLE|[[:digit:]]{3,4}) # RadioDLTimeout +-- ([[:blank:]]+) # Pad +-- (\-[[:digit:]]{2,3}) # RSSIThreshold +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,5}) # CCKIdSCKVerNr +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,5}) # LocationArea +-- ([[:blank:]]+) # Pad +-- ([(1|0)]{16}) # SubscriberClass +-- ([[:blank:]]+) # Pad +-- ([(1|0)]{12}) # BSServiceDetails +-- ([[:blank:]]+) # Pad +-- (RANDOMIZE|IMMEDIATE|[[:digit:]]{1,2}) # IMM +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # WT +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # Nu +-- ([[:blank:]]+) # Pad +-- ([0-1]) # FrameLngFctr +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # TSPtr +-- ([[:blank:]]+) # Pad +-- ([0-7]) # MinPriority +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # ExtdSrvcsEnabled +-- ([[:blank:]]+) # Pad +-- (.*) # ConditionalFields +-- }] 0 +-- } 68 +select * from test_regex($$ + ^TETRA_MODE_CMD # Message Type + ([[:blank:]]+) # Pad + (ETS_1_1|ETS_1_2|ETS_2_2) # SystemCode + ([[:blank:]]+) # Pad + (CONTINUOUS|CARRIER|MCCH|TRAFFIC) # SharingMode + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # ColourCode + ([[:blank:]]+) # Pad + (1|2|3|4|6|9|12|18) # TSReservedFrames + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # UPlaneDTX + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # Frame18Extension + ([[:blank:]]+) # Pad + ([[:digit:]]{1,4}) # MCC + ([[:blank:]]+) # Pad + ([[:digit:]]{1,5}) # MNC + ([[:blank:]]+) # Pad + (BOTH|BCAST|ENQRY|NONE) # NbrCellBcast + ([[:blank:]]+) # Pad + (UNKNOWN|LOW|MEDIUM|HIGH) # CellServiceLevel + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # LateEntryInfo + ([[:blank:]]+) # Pad + (300|400) # FrequencyBand + ([[:blank:]]+) # Pad + (NORMAL|REVERSE) # ReverseOperation + ([[:blank:]]+) # Pad + (NONE|\+6\.25|\-6\.25|\+12\.5) # Offset + ([[:blank:]]+) # Pad + (10) # DuplexSpacing + ([[:blank:]]+) # Pad + ([[:digit:]]{1,4}) # MainCarrierNr + ([[:blank:]]+) # Pad + (0|1|2|3) # NrCSCCH + ([[:blank:]]+) # Pad + (15|20|25|30|35|40|45) # MSTxPwrMax + ([[:blank:]]+) # Pad + (\-125|\-120|\-115|\-110|\-105|\-100|\-95|\-90|\-85|\-80|\-75|\-70|\-65|\-60|\-55|\-50) + # RxLevAccessMin + ([[:blank:]]+) # Pad + (\-53|\-51|\-49|\-47|\-45|\-43|\-41|\-39|\-37|\-35|\-33|\-31|\-29|\-27|\-25|\-23) + # AccessParameter + ([[:blank:]]+) # Pad + (DISABLE|[[:digit:]]{3,4}) # RadioDLTimeout + ([[:blank:]]+) # Pad + (\-[[:digit:]]{2,3}) # RSSIThreshold + ([[:blank:]]+) # Pad + ([[:digit:]]{1,5}) # CCKIdSCKVerNr + ([[:blank:]]+) # Pad + ([[:digit:]]{1,5}) # LocationArea + ([[:blank:]]+) # Pad + ([(1|0)]{16}) # SubscriberClass + ([[:blank:]]+) # Pad + ([(1|0)]{12}) # BSServiceDetails + ([[:blank:]]+) # Pad + (RANDOMIZE|IMMEDIATE|[[:digit:]]{1,2}) # IMM + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # WT + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # Nu + ([[:blank:]]+) # Pad + ([0-1]) # FrameLngFctr + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # TSPtr + ([[:blank:]]+) # Pad + ([0-7]) # MinPriority + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # ExtdSrvcsEnabled + ([[:blank:]]+) # Pad + (.*) # ConditionalFields + $$, '', 'xLMPQ'); + test_regex +-------------------------------------------------------- + {68,REG_UBOUNDS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE} +(1 row) + +-- test reg-33.16.1 {Bug [8d2c0da36d]- another "in the wild" RE} { +-- lindex [regexp -about "^MRK:client1: =1339 14HKelly Talisman 10011000 (\[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]*) \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 8 0 8 0 0 0 77 77 1 1 2 0 11 { 1 3 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 13HC6 My Creator 2 3 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 31HC7 Slightly offensive name, huh 3 8 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 23HE-mail:kelly@hotbox.com 4 9 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 17Hcompface must die 5 10 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 0 3HAir 6 12 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 14HPGP public key 7 13 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 16Hkelly@hotbox.com 8 30 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 0 12H2 text/plain 9 30 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 0 13H2 x-kom/basic 10 33 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 1H0 11 14 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 1H3 }\r?"] 0 +-- } 1 +select * from test_regex(E'^MRK:client1: =1339 14HKelly Talisman 10011000 ([0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]*) [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 8 0 8 0 0 0 77 77 1 1 2 0 11 { 1 3 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 13HC6 My Creator 2 3 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 31HC7 Slightly offensive name, huh 3 8 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 23HE-mail:kelly@hotbox.com 4 9 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 17Hcompface must die 5 10 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 0 3HAir 6 12 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 14HPGP public key 7 13 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 16Hkelly@hotbox.com 8 30 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 0 12H2 text/plain 9 30 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 0 13H2 x-kom/basic 10 33 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 1H0 11 14 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 1H3 }\r?', '', 'BMS'); + test_regex +----------------------------------------- + {1,REG_UBRACES,REG_UUNSPEC,REG_UUNPORT} +(1 row) + +-- test reg-33.15 {constraint fixes} { +-- regexp {(^)+^} x +-- } 1 +select * from test_regex('(^)+^', 'x', 'N'); + test_regex +--------------------- + {1,REG_UEMPTYMATCH} + {"",""} +(2 rows) + +-- test reg-33.16 {constraint fixes} { +-- regexp {($^)+} x +-- } 0 +select * from test_regex('($^)+', 'x', 'N'); + test_regex +--------------------- + {1,REG_UEMPTYMATCH} +(1 row) + +-- test reg-33.17 {constraint fixes} { +-- regexp {(^$)*} x +-- } 1 +select * from test_regex('(^$)*', 'x', 'N'); + test_regex +--------------------- + {1,REG_UEMPTYMATCH} + {"",NULL} +(2 rows) + +-- test reg-33.18 {constraint fixes} { +-- regexp {(^(?!aa))+} {aa bb cc} +-- } 0 +select * from test_regex('(^(?!aa))+', 'aa bb cc', 'HP'); + test_regex +----------------------------------- + {1,REG_ULOOKAROUND,REG_UNONPOSIX} +(1 row) + +-- test reg-33.19 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {aa x} +-- } 0 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'aa x', 'HP'); + test_regex +----------------------------------- + {1,REG_ULOOKAROUND,REG_UNONPOSIX} +(1 row) + +-- test reg-33.20 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {bb x} +-- } 0 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'bb x', 'HP'); + test_regex +----------------------------------- + {1,REG_ULOOKAROUND,REG_UNONPOSIX} +(1 row) + +-- test reg-33.21 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {cc x} +-- } 0 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'cc x', 'HP'); + test_regex +----------------------------------- + {1,REG_ULOOKAROUND,REG_UNONPOSIX} +(1 row) + +-- test reg-33.22 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {dd x} +-- } 1 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'dd x', 'HP'); + test_regex +----------------------------------- + {1,REG_ULOOKAROUND,REG_UNONPOSIX} + {"",""} +(2 rows) + +-- test reg-33.23 {} { +-- regexp {abcd(\m)+xyz} x +-- } 0 +select * from test_regex('abcd(\m)+xyz', 'x', 'ILP'); + test_regex +----------------------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE,REG_UIMPOSSIBLE} +(1 row) + +-- test reg-33.24 {} { +-- regexp {abcd(\m)+xyz} a +-- } 0 +select * from test_regex('abcd(\m)+xyz', 'a', 'ILP'); + test_regex +----------------------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE,REG_UIMPOSSIBLE} +(1 row) + +-- test reg-33.25 {} { +-- regexp {^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)} x +-- } 0 +select * from test_regex('^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)', 'x', 'S'); + test_regex +----------------- + {7,REG_UUNSPEC} +(1 row) + +-- test reg-33.26 {} { +-- regexp {a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$} x +-- } 0 +select * from test_regex('a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$', 'x', 'IS'); + test_regex +--------------------------------- + {7,REG_UUNSPEC,REG_UIMPOSSIBLE} +(1 row) + +-- test reg-33.27 {} { +-- regexp {xyz(\Y\Y)+} x +-- } 0 +select * from test_regex('xyz(\Y\Y)+', 'x', 'LP'); + test_regex +------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE} +(1 row) + +-- test reg-33.28 {} { +-- regexp {x|(?:\M)+} x +-- } 1 +select * from test_regex('x|(?:\M)+', 'x', 'LNP'); + test_regex +----------------------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE,REG_UEMPTYMATCH} + {x} +(2 rows) + +-- test reg-33.29 {} { +-- # This is near the limits of the RE engine +-- regexp [string repeat x*y*z* 480] x +-- } 1 +-- The runtime cost of this seems out of proportion to the value, +-- so for Postgres purposes reduce the repeat to 200x +select * from test_regex(repeat('x*y*z*', 200), 'x', 'N'); + test_regex +--------------------- + {0,REG_UEMPTYMATCH} + {x} +(2 rows) + +-- test reg-33.30 {Bug 1080042} { +-- regexp {(\Y)+} foo +-- } 1 +select * from test_regex('(\Y)+', 'foo', 'LNP'); + test_regex +----------------------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE,REG_UEMPTYMATCH} + {"",""} +(2 rows) + diff --git a/src/test/modules/test_regex/expected/test_regex_utf8.out b/src/test/modules/test_regex/expected/test_regex_utf8.out new file mode 100644 index 0000000000..112698ac61 --- /dev/null +++ b/src/test/modules/test_regex/expected/test_regex_utf8.out @@ -0,0 +1,100 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif +set client_encoding = utf8; +set standard_conforming_strings = on; +-- Run the Tcl test cases that require Unicode +-- expectMatch 9.44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ +-- "a\u0102\u02ffb" "a\u0102\u02ffb" +select * from test_regex('a[\u00fe-\u0507][\u00ff-\u0300]b', E'a\u0102\u02ffb', 'EMP*'); + test_regex +---------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT} + {aĂ˿b} +(2 rows) + +-- expectMatch 13.27 P "a\\U00001234x" "a\u1234x" "a\u1234x" +select * from test_regex('a\U00001234x', E'a\u1234x', 'P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {aሴx} +(2 rows) + +-- expectMatch 13.28 P {a\U00001234x} "a\u1234x" "a\u1234x" +select * from test_regex('a\U00001234x', E'a\u1234x', 'P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {aሴx} +(2 rows) + +-- expectMatch 13.29 P "a\\U0001234x" "a\u1234x" "a\u1234x" +-- Tcl has relaxed their code to allow 1-8 hex digits, but Postgres hasn't +select * from test_regex('a\U0001234x', E'a\u1234x', 'P'); +ERROR: invalid regular expression: invalid escape \ sequence +-- expectMatch 13.30 P {a\U0001234x} "a\u1234x" "a\u1234x" +-- Tcl has relaxed their code to allow 1-8 hex digits, but Postgres hasn't +select * from test_regex('a\U0001234x', E'a\u1234x', 'P'); +ERROR: invalid regular expression: invalid escape \ sequence +-- expectMatch 13.31 P "a\\U000012345x" "a\u12345x" "a\u12345x" +select * from test_regex('a\U000012345x', E'a\u12345x', 'P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {aሴ5x} +(2 rows) + +-- expectMatch 13.32 P {a\U000012345x} "a\u12345x" "a\u12345x" +select * from test_regex('a\U000012345x', E'a\u12345x', 'P'); + test_regex +------------------- + {0,REG_UNONPOSIX} + {aሴ5x} +(2 rows) + +-- expectMatch 13.33 P "a\\U1000000x" "a\ufffd0x" "a\ufffd0x" +-- Tcl allows this as a standalone character, but Postgres doesn't +select * from test_regex('a\U1000000x', E'a\ufffd0x', 'P'); +ERROR: invalid regular expression: invalid escape \ sequence +-- expectMatch 13.34 P {a\U1000000x} "a\ufffd0x" "a\ufffd0x" +-- Tcl allows this as a standalone character, but Postgres doesn't +select * from test_regex('a\U1000000x', E'a\ufffd0x', 'P'); +ERROR: invalid regular expression: invalid escape \ sequence +-- Additional tests, not derived from Tcl +-- Exercise logic around high character ranges a bit more +select * from test_regex('a + [\u1000-\u1100]* + [\u3000-\u3100]* + [\u1234-\u25ff]+ + [\u2000-\u35ff]* + [\u2600-\u2f00]* + \u1236\u1236x', + E'a\u1234\u1236\u1236x', 'xEMP'); + test_regex +---------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT} + {aሴሶሶx} +(2 rows) + +select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', + E'\u1500\u1237', 'ELMP'); + test_regex +---------------------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE} + {ᔀሷ} +(2 rows) + +select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', + E'A\u1239', 'ELMP'); + test_regex +---------------------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE} +(1 row) + diff --git a/src/test/modules/test_regex/expected/test_regex_utf8_1.out b/src/test/modules/test_regex/expected/test_regex_utf8_1.out new file mode 100644 index 0000000000..37aead89c0 --- /dev/null +++ b/src/test/modules/test_regex/expected/test_regex_utf8_1.out @@ -0,0 +1,8 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql new file mode 100644 index 0000000000..272dfc0cd6 --- /dev/null +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -0,0 +1,1667 @@ +-- This file is based on tests/reg.test from the Tcl distribution, +-- which is marked +-- # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. +-- The full copyright notice can be found in src/backend/regex/COPYRIGHT. +-- Most commented lines below are copied from reg.test. Each +-- test case is followed by an equivalent test using test_regex(). + +create extension test_regex; + +set standard_conforming_strings = on; + +-- # support functions and preliminary misc. +-- # This is sensitive to changes in message wording, but we really have to +-- # test the code->message expansion at least once. +-- ::tcltest::test reg-0.1 "regexp error reporting" { +-- list [catch {regexp (*) ign} msg] $msg +-- } {1 {couldn't compile regular expression pattern: quantifier operand invalid}} +select * from test_regex('(*)', '', ''); + +-- doing 1 "basic sanity checks" + +-- expectMatch 1.1 & abc abc abc +select * from test_regex('abc', 'abc', ''); +select * from test_regex('abc', 'abc', 'b'); +-- expectNomatch 1.2 & abc def +select * from test_regex('abc', 'def', ''); +select * from test_regex('abc', 'def', 'b'); +-- expectMatch 1.3 & abc xyabxabce abc +select * from test_regex('abc', 'xyabxabce', ''); +select * from test_regex('abc', 'xyabxabce', 'b'); + +-- doing 2 "invalid option combinations" + +-- expectError 2.1 qe a INVARG +select * from test_regex('a', '', 'qe'); +-- expectError 2.2 qa a INVARG +select * from test_regex('a', '', 'qa'); +-- expectError 2.3 qx a INVARG +select * from test_regex('a', '', 'qx'); +-- expectError 2.4 qn a INVARG +select * from test_regex('a', '', 'qn'); +-- expectError 2.5 ba a INVARG +select * from test_regex('a', '', 'ba'); + +-- doing 3 "basic syntax" + +-- expectIndices 3.1 &NS "" a {0 -1} +select * from test_regex('', 'a', '0NS'); +select * from test_regex('', 'a', '0NSb'); +-- expectMatch 3.2 NS a| a a +select * from test_regex('a|', 'a', 'NS'); +-- expectMatch 3.3 - a|b a a +select * from test_regex('a|b', 'a', '-'); +-- expectMatch 3.4 - a|b b b +select * from test_regex('a|b', 'b', '-'); +-- expectMatch 3.5 NS a||b b b +select * from test_regex('a||b', 'b', 'NS'); +-- expectMatch 3.6 & ab ab ab +select * from test_regex('ab', 'ab', ''); +select * from test_regex('ab', 'ab', 'b'); + +-- doing 4 "parentheses" + +-- expectMatch 4.1 - (a)e ae ae a +select * from test_regex('(a)e', 'ae', '-'); +-- expectMatch 4.2 o (a)e ae +select * from test_regex('(a)e', 'ae', 'o'); +-- expectMatch 4.3 b {\(a\)b} ab ab a +select * from test_regex('\(a\)b', 'ab', 'b'); +-- expectMatch 4.4 - a((b)c) abc abc bc b +select * from test_regex('a((b)c)', 'abc', '-'); +-- expectMatch 4.5 - a(b)(c) abc abc b c +select * from test_regex('a(b)(c)', 'abc', '-'); +-- expectError 4.6 - a(b EPAREN +select * from test_regex('a(b', '', '-'); +-- expectError 4.7 b {a\(b} EPAREN +select * from test_regex('a\(b', '', 'b'); +-- # sigh, we blew it on the specs here... someday this will be fixed in POSIX, +-- # but meanwhile, it's fixed in AREs +-- expectMatch 4.8 eU a)b a)b a)b +select * from test_regex('a)b', 'a)b', 'eU'); +-- expectError 4.9 - a)b EPAREN +select * from test_regex('a)b', '', '-'); +-- expectError 4.10 b {a\)b} EPAREN +select * from test_regex('a\)b', '', 'b'); +-- expectMatch 4.11 P a(?:b)c abc abc +select * from test_regex('a(?:b)c', 'abc', 'P'); +-- expectError 4.12 e a(?:b)c BADRPT +select * from test_regex('a(?:b)c', '', 'e'); +-- expectIndices 4.13 S a()b ab {0 1} {1 0} +select * from test_regex('a()b', 'ab', '0S'); +-- expectMatch 4.14 SP a(?:)b ab ab +select * from test_regex('a(?:)b', 'ab', 'SP'); +-- expectIndices 4.15 S a(|b)c ac {0 1} {1 0} +select * from test_regex('a(|b)c', 'ac', '0S'); +-- expectMatch 4.16 S a(b|)c abc abc b +select * from test_regex('a(b|)c', 'abc', 'S'); + +-- doing 5 "simple one-char matching" +-- # general case of brackets done later + +-- expectMatch 5.1 & a.b axb axb +select * from test_regex('a.b', 'axb', ''); +select * from test_regex('a.b', 'axb', 'b'); +-- expectNomatch 5.2 &n "a.b" "a\nb" +select * from test_regex('a.b', E'a\nb', 'n'); +select * from test_regex('a.b', E'a\nb', 'nb'); +-- expectMatch 5.3 & {a[bc]d} abd abd +select * from test_regex('a[bc]d', 'abd', ''); +select * from test_regex('a[bc]d', 'abd', 'b'); +-- expectMatch 5.4 & {a[bc]d} acd acd +select * from test_regex('a[bc]d', 'acd', ''); +select * from test_regex('a[bc]d', 'acd', 'b'); +-- expectNomatch 5.5 & {a[bc]d} aed +select * from test_regex('a[bc]d', 'aed', ''); +select * from test_regex('a[bc]d', 'aed', 'b'); +-- expectNomatch 5.6 & {a[^bc]d} abd +select * from test_regex('a[^bc]d', 'abd', ''); +select * from test_regex('a[^bc]d', 'abd', 'b'); +-- expectMatch 5.7 & {a[^bc]d} aed aed +select * from test_regex('a[^bc]d', 'aed', ''); +select * from test_regex('a[^bc]d', 'aed', 'b'); +-- expectNomatch 5.8 &p "a\[^bc]d" "a\nd" +select * from test_regex('a[^bc]d', E'a\nd', 'p'); +select * from test_regex('a[^bc]d', E'a\nd', 'pb'); + +-- doing 6 "context-dependent syntax" +-- # plus odds and ends + +-- expectError 6.1 - * BADRPT +select * from test_regex('*', '', '-'); +-- expectMatch 6.2 b * * * +select * from test_regex('*', '*', 'b'); +-- expectMatch 6.3 b {\(*\)} * * * +select * from test_regex('\(*\)', '*', 'b'); +-- expectError 6.4 - (*) BADRPT +select * from test_regex('(*)', '', '-'); +-- expectMatch 6.5 b ^* * * +select * from test_regex('^*', '*', 'b'); +-- expectError 6.6 - ^* BADRPT +select * from test_regex('^*', '', '-'); +-- expectNomatch 6.7 & ^b ^b +select * from test_regex('^b', '^b', ''); +select * from test_regex('^b', '^b', 'b'); +-- expectMatch 6.8 b x^ x^ x^ +select * from test_regex('x^', 'x^', 'b'); +-- expectNomatch 6.9 I x^ x +select * from test_regex('x^', 'x', 'I'); +-- expectMatch 6.10 n "\n^" "x\nb" "\n" +select * from test_regex(E'\n^', E'x\nb', 'n'); +-- expectNomatch 6.11 bS {\(^b\)} ^b +select * from test_regex('\(^b\)', '^b', 'bS'); +-- expectMatch 6.12 - (^b) b b b +select * from test_regex('(^b)', 'b', '-'); +-- expectMatch 6.13 & {x$} x x +select * from test_regex('x$', 'x', ''); +select * from test_regex('x$', 'x', 'b'); +-- expectMatch 6.14 bS {\(x$\)} x x x +select * from test_regex('\(x$\)', 'x', 'bS'); +-- expectMatch 6.15 - {(x$)} x x x +select * from test_regex('(x$)', 'x', '-'); +-- expectMatch 6.16 b {x$y} "x\$y" "x\$y" +select * from test_regex('x$y', 'x$y', 'b'); +-- expectNomatch 6.17 I {x$y} xy +select * from test_regex('x$y', 'xy', 'I'); +-- expectMatch 6.18 n "x\$\n" "x\n" "x\n" +select * from test_regex(E'x$\n', E'x\n', 'n'); +-- expectError 6.19 - + BADRPT +select * from test_regex('+', '', '-'); +-- expectError 6.20 - ? BADRPT +select * from test_regex('?', '', '-'); + +-- These two are not yet incorporated in Tcl, cf +-- https://core.tcl-lang.org/tcl/artifact/106269fa65d96b83 +-- expectError 6.21 - {x(\w)(?=(\1))} ESUBREG +select * from test_regex('x(\w)(?=(\1))', '', '-'); +-- expectMatch 6.22 HP {x(?=((foo)))} xfoo x +select * from test_regex('x(?=((foo)))', 'xfoo', 'HP'); + +-- doing 7 "simple quantifiers" + +-- expectMatch 7.1 &N a* aa aa +select * from test_regex('a*', 'aa', 'N'); +select * from test_regex('a*', 'aa', 'Nb'); +-- expectIndices 7.2 &N a* b {0 -1} +select * from test_regex('a*', 'b', '0N'); +select * from test_regex('a*', 'b', '0Nb'); +-- expectMatch 7.3 - a+ aa aa +select * from test_regex('a+', 'aa', '-'); +-- expectMatch 7.4 - a?b ab ab +select * from test_regex('a?b', 'ab', '-'); +-- expectMatch 7.5 - a?b b b +select * from test_regex('a?b', 'b', '-'); +-- expectError 7.6 - ** BADRPT +select * from test_regex('**', '', '-'); +-- expectMatch 7.7 bN ** *** *** +select * from test_regex('**', '***', 'bN'); +-- expectError 7.8 & a** BADRPT +select * from test_regex('a**', '', ''); +select * from test_regex('a**', '', 'b'); +-- expectError 7.9 & a**b BADRPT +select * from test_regex('a**b', '', ''); +select * from test_regex('a**b', '', 'b'); +-- expectError 7.10 & *** BADRPT +select * from test_regex('***', '', ''); +select * from test_regex('***', '', 'b'); +-- expectError 7.11 - a++ BADRPT +select * from test_regex('a++', '', '-'); +-- expectError 7.12 - a?+ BADRPT +select * from test_regex('a?+', '', '-'); +-- expectError 7.13 - a?* BADRPT +select * from test_regex('a?*', '', '-'); +-- expectError 7.14 - a+* BADRPT +select * from test_regex('a+*', '', '-'); +-- expectError 7.15 - a*+ BADRPT +select * from test_regex('a*+', '', '-'); + +-- doing 8 "braces" + +-- expectMatch 8.1 NQ "a{0,1}" "" "" +select * from test_regex('a{0,1}', '', 'NQ'); +-- expectMatch 8.2 NQ "a{0,1}" ac a +select * from test_regex('a{0,1}', 'ac', 'NQ'); +-- expectError 8.3 - "a{1,0}" BADBR +select * from test_regex('a{1,0}', '', '-'); +-- expectError 8.4 - "a{1,2,3}" BADBR +select * from test_regex('a{1,2,3}', '', '-'); +-- expectError 8.5 - "a{257}" BADBR +select * from test_regex('a{257}', '', '-'); +-- expectError 8.6 - "a{1000}" BADBR +select * from test_regex('a{1000}', '', '-'); +-- expectError 8.7 - "a{1" EBRACE +select * from test_regex('a{1', '', '-'); +-- expectError 8.8 - "a{1n}" BADBR +select * from test_regex('a{1n}', '', '-'); +-- expectMatch 8.9 BS "a{b" "a\{b" "a\{b" +select * from test_regex('a{b', 'a{b', 'BS'); +-- expectMatch 8.10 BS "a{" "a\{" "a\{" +select * from test_regex('a{', 'a{', 'BS'); +-- expectMatch 8.11 bQ "a\\{0,1\\}b" cb b +select * from test_regex('a\{0,1\}b', 'cb', 'bQ'); +-- expectError 8.12 b "a\\{0,1" EBRACE +select * from test_regex('a\{0,1', '', 'b'); +-- expectError 8.13 - "a{0,1\\" BADBR +select * from test_regex('a{0,1\', '', '-'); +-- expectMatch 8.14 Q "a{0}b" ab b +select * from test_regex('a{0}b', 'ab', 'Q'); +-- expectMatch 8.15 Q "a{0,0}b" ab b +select * from test_regex('a{0,0}b', 'ab', 'Q'); +-- expectMatch 8.16 Q "a{0,1}b" ab ab +select * from test_regex('a{0,1}b', 'ab', 'Q'); +-- expectMatch 8.17 Q "a{0,2}b" b b +select * from test_regex('a{0,2}b', 'b', 'Q'); +-- expectMatch 8.18 Q "a{0,2}b" aab aab +select * from test_regex('a{0,2}b', 'aab', 'Q'); +-- expectMatch 8.19 Q "a{0,}b" aab aab +select * from test_regex('a{0,}b', 'aab', 'Q'); +-- expectMatch 8.20 Q "a{1,1}b" aab ab +select * from test_regex('a{1,1}b', 'aab', 'Q'); +-- expectMatch 8.21 Q "a{1,3}b" aaaab aaab +select * from test_regex('a{1,3}b', 'aaaab', 'Q'); +-- expectNomatch 8.22 Q "a{1,3}b" b +select * from test_regex('a{1,3}b', 'b', 'Q'); +-- expectMatch 8.23 Q "a{1,}b" aab aab +select * from test_regex('a{1,}b', 'aab', 'Q'); +-- expectNomatch 8.24 Q "a{2,3}b" ab +select * from test_regex('a{2,3}b', 'ab', 'Q'); +-- expectMatch 8.25 Q "a{2,3}b" aaaab aaab +select * from test_regex('a{2,3}b', 'aaaab', 'Q'); +-- expectNomatch 8.26 Q "a{2,}b" ab +select * from test_regex('a{2,}b', 'ab', 'Q'); +-- expectMatch 8.27 Q "a{2,}b" aaaab aaaab +select * from test_regex('a{2,}b', 'aaaab', 'Q'); + +-- doing 9 "brackets" + +-- expectMatch 9.1 & {a[bc]} ac ac +select * from test_regex('a[bc]', 'ac', ''); +select * from test_regex('a[bc]', 'ac', 'b'); +-- expectMatch 9.2 & {a[-]} a- a- +select * from test_regex('a[-]', 'a-', ''); +select * from test_regex('a[-]', 'a-', 'b'); +-- expectMatch 9.3 & {a[[.-.]]} a- a- +select * from test_regex('a[[.-.]]', 'a-', ''); +select * from test_regex('a[[.-.]]', 'a-', 'b'); +-- expectMatch 9.4 &L {a[[.zero.]]} a0 a0 +select * from test_regex('a[[.zero.]]', 'a0', 'L'); +select * from test_regex('a[[.zero.]]', 'a0', 'Lb'); +-- expectMatch 9.5 &LM {a[[.zero.]-9]} a2 a2 +select * from test_regex('a[[.zero.]-9]', 'a2', 'LM'); +select * from test_regex('a[[.zero.]-9]', 'a2', 'LMb'); +-- expectMatch 9.6 &M {a[0-[.9.]]} a2 a2 +select * from test_regex('a[0-[.9.]]', 'a2', 'M'); +select * from test_regex('a[0-[.9.]]', 'a2', 'Mb'); +-- expectMatch 9.7 &+L {a[[=x=]]} ax ax +select * from test_regex('a[[=x=]]', 'ax', '+L'); +select * from test_regex('a[[=x=]]', 'ax', '+Lb'); +-- expectMatch 9.8 &+L {a[[=x=]]} ay ay +select * from test_regex('a[[=x=]]', 'ay', '+L'); +select * from test_regex('a[[=x=]]', 'ay', '+Lb'); +-- expectNomatch 9.9 &+L {a[[=x=]]} az +select * from test_regex('a[[=x=]]', 'az', '+L'); +select * from test_regex('a[[=x=]]', 'az', '+Lb'); +-- expectError 9.10 & {a[0-[=x=]]} ERANGE +select * from test_regex('a[0-[=x=]]', '', ''); +select * from test_regex('a[0-[=x=]]', '', 'b'); +-- expectMatch 9.11 &L {a[[:digit:]]} a0 a0 +select * from test_regex('a[[:digit:]]', 'a0', 'L'); +select * from test_regex('a[[:digit:]]', 'a0', 'Lb'); +-- expectError 9.12 & {a[[:woopsie:]]} ECTYPE +select * from test_regex('a[[:woopsie:]]', '', ''); +select * from test_regex('a[[:woopsie:]]', '', 'b'); +-- expectNomatch 9.13 &L {a[[:digit:]]} ab +select * from test_regex('a[[:digit:]]', 'ab', 'L'); +select * from test_regex('a[[:digit:]]', 'ab', 'Lb'); +-- expectError 9.14 & {a[0-[:digit:]]} ERANGE +select * from test_regex('a[0-[:digit:]]', '', ''); +select * from test_regex('a[0-[:digit:]]', '', 'b'); +-- expectMatch 9.15 &LP {[[:<:]]a} a a +select * from test_regex('[[:<:]]a', 'a', 'LP'); +select * from test_regex('[[:<:]]a', 'a', 'LPb'); +-- expectMatch 9.16 &LP {a[[:>:]]} a a +select * from test_regex('a[[:>:]]', 'a', 'LP'); +select * from test_regex('a[[:>:]]', 'a', 'LPb'); +-- expectError 9.17 & {a[[..]]b} ECOLLATE +select * from test_regex('a[[..]]b', '', ''); +select * from test_regex('a[[..]]b', '', 'b'); +-- expectError 9.18 & {a[[==]]b} ECOLLATE +select * from test_regex('a[[==]]b', '', ''); +select * from test_regex('a[[==]]b', '', 'b'); +-- expectError 9.19 & {a[[::]]b} ECTYPE +select * from test_regex('a[[::]]b', '', ''); +select * from test_regex('a[[::]]b', '', 'b'); +-- expectError 9.20 & {a[[.a} EBRACK +select * from test_regex('a[[.a', '', ''); +select * from test_regex('a[[.a', '', 'b'); +-- expectError 9.21 & {a[[=a} EBRACK +select * from test_regex('a[[=a', '', ''); +select * from test_regex('a[[=a', '', 'b'); +-- expectError 9.22 & {a[[:a} EBRACK +select * from test_regex('a[[:a', '', ''); +select * from test_regex('a[[:a', '', 'b'); +-- expectError 9.23 & {a[} EBRACK +select * from test_regex('a[', '', ''); +select * from test_regex('a[', '', 'b'); +-- expectError 9.24 & {a[b} EBRACK +select * from test_regex('a[b', '', ''); +select * from test_regex('a[b', '', 'b'); +-- expectError 9.25 & {a[b-} EBRACK +select * from test_regex('a[b-', '', ''); +select * from test_regex('a[b-', '', 'b'); +-- expectError 9.26 & {a[b-c} EBRACK +select * from test_regex('a[b-c', '', ''); +select * from test_regex('a[b-c', '', 'b'); +-- expectMatch 9.27 &M {a[b-c]} ab ab +select * from test_regex('a[b-c]', 'ab', 'M'); +select * from test_regex('a[b-c]', 'ab', 'Mb'); +-- expectMatch 9.28 & {a[b-b]} ab ab +select * from test_regex('a[b-b]', 'ab', ''); +select * from test_regex('a[b-b]', 'ab', 'b'); +-- expectMatch 9.29 &M {a[1-2]} a2 a2 +select * from test_regex('a[1-2]', 'a2', 'M'); +select * from test_regex('a[1-2]', 'a2', 'Mb'); +-- expectError 9.30 & {a[c-b]} ERANGE +select * from test_regex('a[c-b]', '', ''); +select * from test_regex('a[c-b]', '', 'b'); +-- expectError 9.31 & {a[a-b-c]} ERANGE +select * from test_regex('a[a-b-c]', '', ''); +select * from test_regex('a[a-b-c]', '', 'b'); +-- expectMatch 9.32 &M {a[--?]b} a?b a?b +select * from test_regex('a[--?]b', 'a?b', 'M'); +select * from test_regex('a[--?]b', 'a?b', 'Mb'); +-- expectMatch 9.33 & {a[---]b} a-b a-b +select * from test_regex('a[---]b', 'a-b', ''); +select * from test_regex('a[---]b', 'a-b', 'b'); +-- expectMatch 9.34 & {a[]b]c} a]c a]c +select * from test_regex('a[]b]c', 'a]c', ''); +select * from test_regex('a[]b]c', 'a]c', 'b'); +-- expectMatch 9.35 EP {a[\]]b} a]b a]b +select * from test_regex('a[\]]b', 'a]b', 'EP'); +-- expectNomatch 9.36 bE {a[\]]b} a]b +select * from test_regex('a[\]]b', 'a]b', 'bE'); +-- expectMatch 9.37 bE {a[\]]b} "a\\]b" "a\\]b" +select * from test_regex('a[\]]b', 'a\]b', 'bE'); +-- expectMatch 9.38 eE {a[\]]b} "a\\]b" "a\\]b" +select * from test_regex('a[\]]b', 'a\]b', 'eE'); +-- expectMatch 9.39 EP {a[\\]b} "a\\b" "a\\b" +select * from test_regex('a[\\]b', 'a\b', 'EP'); +-- expectMatch 9.40 eE {a[\\]b} "a\\b" "a\\b" +select * from test_regex('a[\\]b', 'a\b', 'eE'); +-- expectMatch 9.41 bE {a[\\]b} "a\\b" "a\\b" +select * from test_regex('a[\\]b', 'a\b', 'bE'); +-- expectError 9.42 - {a[\Z]b} EESCAPE +select * from test_regex('a[\Z]b', '', '-'); +-- expectMatch 9.43 & {a[[b]c} "a\[c" "a\[c" +select * from test_regex('a[[b]c', 'a[c', ''); +select * from test_regex('a[[b]c', 'a[c', 'b'); +-- This only works in UTF8 encoding, so it's moved to test_regex_utf8.sql: +-- expectMatch 9.44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ +-- "a\u0102\u02ffb" "a\u0102\u02ffb" + +-- doing 10 "anchors and newlines" + +-- expectMatch 10.1 & ^a a a +select * from test_regex('^a', 'a', ''); +select * from test_regex('^a', 'a', 'b'); +-- expectNomatch 10.2 &^ ^a a +select * from test_regex('^a', 'a', '^'); +select * from test_regex('^a', 'a', '^b'); +-- expectIndices 10.3 &N ^ a {0 -1} +select * from test_regex('^', 'a', '0N'); +select * from test_regex('^', 'a', '0Nb'); +-- expectIndices 10.4 & {a$} aba {2 2} +select * from test_regex('a$', 'aba', '0'); +select * from test_regex('a$', 'aba', '0b'); +-- expectNomatch 10.5 {&$} {a$} a +select * from test_regex('a$', 'a', '$'); +select * from test_regex('a$', 'a', '$b'); +-- expectIndices 10.6 &N {$} ab {2 1} +select * from test_regex('$', 'ab', '0N'); +select * from test_regex('$', 'ab', '0Nb'); +-- expectMatch 10.7 &n ^a a a +select * from test_regex('^a', 'a', 'n'); +select * from test_regex('^a', 'a', 'nb'); +-- expectMatch 10.8 &n "^a" "b\na" "a" +select * from test_regex('^a', E'b\na', 'n'); +select * from test_regex('^a', E'b\na', 'nb'); +-- expectIndices 10.9 &w "^a" "a\na" {0 0} +select * from test_regex('^a', E'a\na', '0w'); +select * from test_regex('^a', E'a\na', '0wb'); +-- expectIndices 10.10 &n^ "^a" "a\na" {2 2} +select * from test_regex('^a', E'a\na', '0n^'); +select * from test_regex('^a', E'a\na', '0n^b'); +-- expectMatch 10.11 &n {a$} a a +select * from test_regex('a$', 'a', 'n'); +select * from test_regex('a$', 'a', 'nb'); +-- expectMatch 10.12 &n "a\$" "a\nb" "a" +select * from test_regex('a$', E'a\nb', 'n'); +select * from test_regex('a$', E'a\nb', 'nb'); +-- expectIndices 10.13 &n "a\$" "a\na" {0 0} +select * from test_regex('a$', E'a\na', '0n'); +select * from test_regex('a$', E'a\na', '0nb'); +-- expectIndices 10.14 N ^^ a {0 -1} +select * from test_regex('^^', 'a', '0N'); +-- expectMatch 10.15 b ^^ ^ ^ +select * from test_regex('^^', '^', 'b'); +-- expectIndices 10.16 N {$$} a {1 0} +select * from test_regex('$$', 'a', '0N'); +-- expectMatch 10.17 b {$$} "\$" "\$" +select * from test_regex('$$', '$', 'b'); +-- expectMatch 10.18 &N {^$} "" "" +select * from test_regex('^$', '', 'N'); +select * from test_regex('^$', '', 'Nb'); +-- expectNomatch 10.19 &N {^$} a +select * from test_regex('^$', 'a', 'N'); +select * from test_regex('^$', 'a', 'Nb'); +-- expectIndices 10.20 &nN "^\$" a\n\nb {2 1} +select * from test_regex('^$', E'a\n\nb', '0nN'); +select * from test_regex('^$', E'a\n\nb', '0nNb'); +-- expectMatch 10.21 N {$^} "" "" +select * from test_regex('$^', '', 'N'); +-- expectMatch 10.22 b {$^} "\$^" "\$^" +select * from test_regex('$^', '$^', 'b'); +-- expectMatch 10.23 P {\Aa} a a +select * from test_regex('\Aa', 'a', 'P'); +-- expectMatch 10.24 ^P {\Aa} a a +select * from test_regex('\Aa', 'a', '^P'); +-- expectNomatch 10.25 ^nP {\Aa} "b\na" +select * from test_regex('\Aa', E'b\na', '^nP'); +-- expectMatch 10.26 P {a\Z} a a +select * from test_regex('a\Z', 'a', 'P'); +-- expectMatch 10.27 \$P {a\Z} a a +select * from test_regex('a\Z', 'a', '$P'); +-- expectNomatch 10.28 \$nP {a\Z} "a\nb" +select * from test_regex('a\Z', E'a\nb', '$nP'); +-- expectError 10.29 - ^* BADRPT +select * from test_regex('^*', '', '-'); +-- expectError 10.30 - {$*} BADRPT +select * from test_regex('$*', '', '-'); +-- expectError 10.31 - {\A*} BADRPT +select * from test_regex('\A*', '', '-'); +-- expectError 10.32 - {\Z*} BADRPT +select * from test_regex('\Z*', '', '-'); + +-- doing 11 "boundary constraints" + +-- expectMatch 11.1 &LP {[[:<:]]a} a a +select * from test_regex('[[:<:]]a', 'a', 'LP'); +select * from test_regex('[[:<:]]a', 'a', 'LPb'); +-- expectMatch 11.2 &LP {[[:<:]]a} -a a +select * from test_regex('[[:<:]]a', '-a', 'LP'); +select * from test_regex('[[:<:]]a', '-a', 'LPb'); +-- expectNomatch 11.3 &LP {[[:<:]]a} ba +select * from test_regex('[[:<:]]a', 'ba', 'LP'); +select * from test_regex('[[:<:]]a', 'ba', 'LPb'); +-- expectMatch 11.4 &LP {a[[:>:]]} a a +select * from test_regex('a[[:>:]]', 'a', 'LP'); +select * from test_regex('a[[:>:]]', 'a', 'LPb'); +-- expectMatch 11.5 &LP {a[[:>:]]} a- a +select * from test_regex('a[[:>:]]', 'a-', 'LP'); +select * from test_regex('a[[:>:]]', 'a-', 'LPb'); +-- expectNomatch 11.6 &LP {a[[:>:]]} ab +select * from test_regex('a[[:>:]]', 'ab', 'LP'); +select * from test_regex('a[[:>:]]', 'ab', 'LPb'); +-- expectMatch 11.7 bLP {\} a a +select * from test_regex('a\>', 'a', 'bLP'); +-- expectNomatch 11.10 bLP {a\>} ab +select * from test_regex('a\>', 'ab', 'bLP'); +-- expectMatch 11.11 LP {\ya} a a +select * from test_regex('\ya', 'a', 'LP'); +-- expectNomatch 11.12 LP {\ya} ba +select * from test_regex('\ya', 'ba', 'LP'); +-- expectMatch 11.13 LP {a\y} a a +select * from test_regex('a\y', 'a', 'LP'); +-- expectNomatch 11.14 LP {a\y} ab +select * from test_regex('a\y', 'ab', 'LP'); +-- expectMatch 11.15 LP {a\Y} ab a +select * from test_regex('a\Y', 'ab', 'LP'); +-- expectNomatch 11.16 LP {a\Y} a- +select * from test_regex('a\Y', 'a-', 'LP'); +-- expectNomatch 11.17 LP {a\Y} a +select * from test_regex('a\Y', 'a', 'LP'); +-- expectNomatch 11.18 LP {-\Y} -a +select * from test_regex('-\Y', '-a', 'LP'); +-- expectMatch 11.19 LP {-\Y} -% - +select * from test_regex('-\Y', '-%', 'LP'); +-- expectNomatch 11.20 LP {\Y-} a- +select * from test_regex('\Y-', 'a-', 'LP'); +-- expectError 11.21 - {[[:<:]]*} BADRPT +select * from test_regex('[[:<:]]*', '', '-'); +-- expectError 11.22 - {[[:>:]]*} BADRPT +select * from test_regex('[[:>:]]*', '', '-'); +-- expectError 11.23 b {\<*} BADRPT +select * from test_regex('\<*', '', 'b'); +-- expectError 11.24 b {\>*} BADRPT +select * from test_regex('\>*', '', 'b'); +-- expectError 11.25 - {\y*} BADRPT +select * from test_regex('\y*', '', '-'); +-- expectError 11.26 - {\Y*} BADRPT +select * from test_regex('\Y*', '', '-'); +-- expectMatch 11.27 LP {\ma} a a +select * from test_regex('\ma', 'a', 'LP'); +-- expectNomatch 11.28 LP {\ma} ba +select * from test_regex('\ma', 'ba', 'LP'); +-- expectMatch 11.29 LP {a\M} a a +select * from test_regex('a\M', 'a', 'LP'); +-- expectNomatch 11.30 LP {a\M} ab +select * from test_regex('a\M', 'ab', 'LP'); +-- expectNomatch 11.31 ILP {\Ma} a +select * from test_regex('\Ma', 'a', 'ILP'); +-- expectNomatch 11.32 ILP {a\m} a +select * from test_regex('a\m', 'a', 'ILP'); + +-- doing 12 "character classes" + +-- expectMatch 12.1 LP {a\db} a0b a0b +select * from test_regex('a\db', 'a0b', 'LP'); +-- expectNomatch 12.2 LP {a\db} axb +select * from test_regex('a\db', 'axb', 'LP'); +-- expectNomatch 12.3 LP {a\Db} a0b +select * from test_regex('a\Db', 'a0b', 'LP'); +-- expectMatch 12.4 LP {a\Db} axb axb +select * from test_regex('a\Db', 'axb', 'LP'); +-- expectMatch 12.5 LP "a\\sb" "a b" "a b" +select * from test_regex('a\sb', 'a b', 'LP'); +-- expectMatch 12.6 LP "a\\sb" "a\tb" "a\tb" +select * from test_regex('a\sb', E'a\tb', 'LP'); +-- expectMatch 12.7 LP "a\\sb" "a\nb" "a\nb" +select * from test_regex('a\sb', E'a\nb', 'LP'); +-- expectNomatch 12.8 LP {a\sb} axb +select * from test_regex('a\sb', 'axb', 'LP'); +-- expectMatch 12.9 LP {a\Sb} axb axb +select * from test_regex('a\Sb', 'axb', 'LP'); +-- expectNomatch 12.10 LP "a\\Sb" "a b" +select * from test_regex('a\Sb', 'a b', 'LP'); +-- expectMatch 12.11 LP {a\wb} axb axb +select * from test_regex('a\wb', 'axb', 'LP'); +-- expectNomatch 12.12 LP {a\wb} a-b +select * from test_regex('a\wb', 'a-b', 'LP'); +-- expectNomatch 12.13 LP {a\Wb} axb +select * from test_regex('a\Wb', 'axb', 'LP'); +-- expectMatch 12.14 LP {a\Wb} a-b a-b +select * from test_regex('a\Wb', 'a-b', 'LP'); +-- expectMatch 12.15 LP {\y\w+z\y} adze-guz guz +select * from test_regex('\y\w+z\y', 'adze-guz', 'LP'); +-- expectMatch 12.16 LPE {a[\d]b} a1b a1b +select * from test_regex('a[\d]b', 'a1b', 'LPE'); +-- expectMatch 12.17 LPE "a\[\\s]b" "a b" "a b" +select * from test_regex('a[\s]b', 'a b', 'LPE'); +-- expectMatch 12.18 LPE {a[\w]b} axb axb +select * from test_regex('a[\w]b', 'axb', 'LPE'); + +-- doing 13 "escapes" + +-- expectError 13.1 & "a\\" EESCAPE +select * from test_regex('a\', '', ''); +select * from test_regex('a\', '', 'b'); +-- expectMatch 13.2 - {a\]+)>} a +-- } 1 +select * from test_regex('\A\s*[^<]*\s*<([^>]+)>', 'a', 'LP'); + +-- test reg-33.4 {Bug 505048} { +-- regexp {\A\s*([^b]*)b} ab +-- } 1 +select * from test_regex('\A\s*([^b]*)b', 'ab', 'LP'); + +-- test reg-33.5 {Bug 505048} { +-- regexp {\A\s*[^b]*(b)} ab +-- } 1 +select * from test_regex('\A\s*[^b]*(b)', 'ab', 'LP'); + +-- test reg-33.6 {Bug 505048} { +-- regexp {\A(\s*)[^b]*(b)} ab +-- } 1 +select * from test_regex('\A(\s*)[^b]*(b)', 'ab', 'LP'); + +-- test reg-33.7 {Bug 505048} { +-- regexp {\A\s*[^b]*b} ab +-- } 1 +select * from test_regex('\A\s*[^b]*b', 'ab', 'LP'); + +-- test reg-33.8 {Bug 505048} { +-- regexp -inline {\A\s*[^b]*b} ab +-- } ab +select * from test_regex('\A\s*[^b]*b', 'ab', 'LP'); + +-- test reg-33.9 {Bug 505048} { +-- regexp -indices -inline {\A\s*[^b]*b} ab +-- } {{0 1}} +select * from test_regex('\A\s*[^b]*b', 'ab', '0LP'); + +-- test reg-33.10 {Bug 840258} -body { +-- regsub {(^|\n)+\.*b} \n.b {} tmp +-- } -cleanup { +-- unset tmp +-- } -result 1 +select * from test_regex('(^|\n)+\.*b', E'\n.b', 'P'); + +-- test reg-33.11 {Bug 840258} -body { +-- regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \ +-- "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp +-- } -cleanup { +-- unset tmp +-- } -result 1 +select * from test_regex('(^|[\n\r]+)\.*\?<.*?(\n|\r)+', E'TQ\r\n.?<5000267>Test already stopped\r\n', 'EP'); + +-- test reg-33.12 {Bug 1810264 - bad read} { +-- regexp {\3161573148} {\3161573148} +-- } 0 +select * from test_regex('\3161573148', '\3161573148', 'MP'); + +-- test reg-33.13 {Bug 1810264 - infinite loop} { +-- regexp {($|^)*} {x} +-- } 1 +select * from test_regex('($|^)*', 'x', 'N'); + +-- # Some environments have small default stack sizes. [Bug 1905562] +-- test reg-33.14 {Bug 1810264 - super-expensive expression} nonPortable { +-- regexp {(x{200}){200}$y} {x} +-- } 0 +-- This might or might not work depending on platform, so skip it +-- select * from test_regex('(x{200}){200}$y', 'x', 'IQ'); + +-- test reg-33.15.1 {Bug 3603557 - an "in the wild" RE} { +-- lindex [regexp -expanded -about { +-- ^TETRA_MODE_CMD # Message Type +-- ([[:blank:]]+) # Pad +-- (ETS_1_1|ETS_1_2|ETS_2_2) # SystemCode +-- ([[:blank:]]+) # Pad +-- (CONTINUOUS|CARRIER|MCCH|TRAFFIC) # SharingMode +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # ColourCode +-- ([[:blank:]]+) # Pad +-- (1|2|3|4|6|9|12|18) # TSReservedFrames +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # UPlaneDTX +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # Frame18Extension +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,4}) # MCC +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,5}) # MNC +-- ([[:blank:]]+) # Pad +-- (BOTH|BCAST|ENQRY|NONE) # NbrCellBcast +-- ([[:blank:]]+) # Pad +-- (UNKNOWN|LOW|MEDIUM|HIGH) # CellServiceLevel +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # LateEntryInfo +-- ([[:blank:]]+) # Pad +-- (300|400) # FrequencyBand +-- ([[:blank:]]+) # Pad +-- (NORMAL|REVERSE) # ReverseOperation +-- ([[:blank:]]+) # Pad +-- (NONE|\+6\.25|\-6\.25|\+12\.5) # Offset +-- ([[:blank:]]+) # Pad +-- (10) # DuplexSpacing +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,4}) # MainCarrierNr +-- ([[:blank:]]+) # Pad +-- (0|1|2|3) # NrCSCCH +-- ([[:blank:]]+) # Pad +-- (15|20|25|30|35|40|45) # MSTxPwrMax +-- ([[:blank:]]+) # Pad +-- (\-125|\-120|\-115|\-110|\-105|\-100|\-95|\-90|\-85|\-80|\-75|\-70|\-65|\-60|\-55|\-50) +-- # RxLevAccessMin +-- ([[:blank:]]+) # Pad +-- (\-53|\-51|\-49|\-47|\-45|\-43|\-41|\-39|\-37|\-35|\-33|\-31|\-29|\-27|\-25|\-23) +-- # AccessParameter +-- ([[:blank:]]+) # Pad +-- (DISABLE|[[:digit:]]{3,4}) # RadioDLTimeout +-- ([[:blank:]]+) # Pad +-- (\-[[:digit:]]{2,3}) # RSSIThreshold +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,5}) # CCKIdSCKVerNr +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,5}) # LocationArea +-- ([[:blank:]]+) # Pad +-- ([(1|0)]{16}) # SubscriberClass +-- ([[:blank:]]+) # Pad +-- ([(1|0)]{12}) # BSServiceDetails +-- ([[:blank:]]+) # Pad +-- (RANDOMIZE|IMMEDIATE|[[:digit:]]{1,2}) # IMM +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # WT +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # Nu +-- ([[:blank:]]+) # Pad +-- ([0-1]) # FrameLngFctr +-- ([[:blank:]]+) # Pad +-- ([[:digit:]]{1,2}) # TSPtr +-- ([[:blank:]]+) # Pad +-- ([0-7]) # MinPriority +-- ([[:blank:]]+) # Pad +-- (PASS|TRUE|FAIL|FALSE) # ExtdSrvcsEnabled +-- ([[:blank:]]+) # Pad +-- (.*) # ConditionalFields +-- }] 0 +-- } 68 +select * from test_regex($$ + ^TETRA_MODE_CMD # Message Type + ([[:blank:]]+) # Pad + (ETS_1_1|ETS_1_2|ETS_2_2) # SystemCode + ([[:blank:]]+) # Pad + (CONTINUOUS|CARRIER|MCCH|TRAFFIC) # SharingMode + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # ColourCode + ([[:blank:]]+) # Pad + (1|2|3|4|6|9|12|18) # TSReservedFrames + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # UPlaneDTX + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # Frame18Extension + ([[:blank:]]+) # Pad + ([[:digit:]]{1,4}) # MCC + ([[:blank:]]+) # Pad + ([[:digit:]]{1,5}) # MNC + ([[:blank:]]+) # Pad + (BOTH|BCAST|ENQRY|NONE) # NbrCellBcast + ([[:blank:]]+) # Pad + (UNKNOWN|LOW|MEDIUM|HIGH) # CellServiceLevel + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # LateEntryInfo + ([[:blank:]]+) # Pad + (300|400) # FrequencyBand + ([[:blank:]]+) # Pad + (NORMAL|REVERSE) # ReverseOperation + ([[:blank:]]+) # Pad + (NONE|\+6\.25|\-6\.25|\+12\.5) # Offset + ([[:blank:]]+) # Pad + (10) # DuplexSpacing + ([[:blank:]]+) # Pad + ([[:digit:]]{1,4}) # MainCarrierNr + ([[:blank:]]+) # Pad + (0|1|2|3) # NrCSCCH + ([[:blank:]]+) # Pad + (15|20|25|30|35|40|45) # MSTxPwrMax + ([[:blank:]]+) # Pad + (\-125|\-120|\-115|\-110|\-105|\-100|\-95|\-90|\-85|\-80|\-75|\-70|\-65|\-60|\-55|\-50) + # RxLevAccessMin + ([[:blank:]]+) # Pad + (\-53|\-51|\-49|\-47|\-45|\-43|\-41|\-39|\-37|\-35|\-33|\-31|\-29|\-27|\-25|\-23) + # AccessParameter + ([[:blank:]]+) # Pad + (DISABLE|[[:digit:]]{3,4}) # RadioDLTimeout + ([[:blank:]]+) # Pad + (\-[[:digit:]]{2,3}) # RSSIThreshold + ([[:blank:]]+) # Pad + ([[:digit:]]{1,5}) # CCKIdSCKVerNr + ([[:blank:]]+) # Pad + ([[:digit:]]{1,5}) # LocationArea + ([[:blank:]]+) # Pad + ([(1|0)]{16}) # SubscriberClass + ([[:blank:]]+) # Pad + ([(1|0)]{12}) # BSServiceDetails + ([[:blank:]]+) # Pad + (RANDOMIZE|IMMEDIATE|[[:digit:]]{1,2}) # IMM + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # WT + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # Nu + ([[:blank:]]+) # Pad + ([0-1]) # FrameLngFctr + ([[:blank:]]+) # Pad + ([[:digit:]]{1,2}) # TSPtr + ([[:blank:]]+) # Pad + ([0-7]) # MinPriority + ([[:blank:]]+) # Pad + (PASS|TRUE|FAIL|FALSE) # ExtdSrvcsEnabled + ([[:blank:]]+) # Pad + (.*) # ConditionalFields + $$, '', 'xLMPQ'); + +-- test reg-33.16.1 {Bug [8d2c0da36d]- another "in the wild" RE} { +-- lindex [regexp -about "^MRK:client1: =1339 14HKelly Talisman 10011000 (\[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]*) \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 8 0 8 0 0 0 77 77 1 1 2 0 11 { 1 3 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 13HC6 My Creator 2 3 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 31HC7 Slightly offensive name, huh 3 8 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 23HE-mail:kelly@hotbox.com 4 9 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 17Hcompface must die 5 10 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 0 3HAir 6 12 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 14HPGP public key 7 13 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 16Hkelly@hotbox.com 8 30 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 0 12H2 text/plain 9 30 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 0 13H2 x-kom/basic 10 33 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 1H0 11 14 8 \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* \[0-9\]* 00000000 1 1H3 }\r?"] 0 +-- } 1 +select * from test_regex(E'^MRK:client1: =1339 14HKelly Talisman 10011000 ([0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]*) [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 8 0 8 0 0 0 77 77 1 1 2 0 11 { 1 3 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 13HC6 My Creator 2 3 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 31HC7 Slightly offensive name, huh 3 8 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 23HE-mail:kelly@hotbox.com 4 9 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 17Hcompface must die 5 10 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 0 3HAir 6 12 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 14HPGP public key 7 13 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 16Hkelly@hotbox.com 8 30 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 0 12H2 text/plain 9 30 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 0 13H2 x-kom/basic 10 33 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 1H0 11 14 8 [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* 00000000 1 1H3 }\r?', '', 'BMS'); + +-- test reg-33.15 {constraint fixes} { +-- regexp {(^)+^} x +-- } 1 +select * from test_regex('(^)+^', 'x', 'N'); + +-- test reg-33.16 {constraint fixes} { +-- regexp {($^)+} x +-- } 0 +select * from test_regex('($^)+', 'x', 'N'); + +-- test reg-33.17 {constraint fixes} { +-- regexp {(^$)*} x +-- } 1 +select * from test_regex('(^$)*', 'x', 'N'); + +-- test reg-33.18 {constraint fixes} { +-- regexp {(^(?!aa))+} {aa bb cc} +-- } 0 +select * from test_regex('(^(?!aa))+', 'aa bb cc', 'HP'); + +-- test reg-33.19 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {aa x} +-- } 0 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'aa x', 'HP'); + +-- test reg-33.20 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {bb x} +-- } 0 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'bb x', 'HP'); + +-- test reg-33.21 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {cc x} +-- } 0 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'cc x', 'HP'); + +-- test reg-33.22 {constraint fixes} { +-- regexp {(^(?!aa)(?!bb)(?!cc))+} {dd x} +-- } 1 +select * from test_regex('(^(?!aa)(?!bb)(?!cc))+', 'dd x', 'HP'); + +-- test reg-33.23 {} { +-- regexp {abcd(\m)+xyz} x +-- } 0 +select * from test_regex('abcd(\m)+xyz', 'x', 'ILP'); + +-- test reg-33.24 {} { +-- regexp {abcd(\m)+xyz} a +-- } 0 +select * from test_regex('abcd(\m)+xyz', 'a', 'ILP'); + +-- test reg-33.25 {} { +-- regexp {^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)} x +-- } 0 +select * from test_regex('^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)', 'x', 'S'); + +-- test reg-33.26 {} { +-- regexp {a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$} x +-- } 0 +select * from test_regex('a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$', 'x', 'IS'); + +-- test reg-33.27 {} { +-- regexp {xyz(\Y\Y)+} x +-- } 0 +select * from test_regex('xyz(\Y\Y)+', 'x', 'LP'); + +-- test reg-33.28 {} { +-- regexp {x|(?:\M)+} x +-- } 1 +select * from test_regex('x|(?:\M)+', 'x', 'LNP'); + +-- test reg-33.29 {} { +-- # This is near the limits of the RE engine +-- regexp [string repeat x*y*z* 480] x +-- } 1 +-- The runtime cost of this seems out of proportion to the value, +-- so for Postgres purposes reduce the repeat to 200x +select * from test_regex(repeat('x*y*z*', 200), 'x', 'N'); + +-- test reg-33.30 {Bug 1080042} { +-- regexp {(\Y)+} foo +-- } 1 +select * from test_regex('(\Y)+', 'foo', 'LNP'); diff --git a/src/test/modules/test_regex/sql/test_regex_utf8.sql b/src/test/modules/test_regex/sql/test_regex_utf8.sql new file mode 100644 index 0000000000..cfd9396194 --- /dev/null +++ b/src/test/modules/test_regex/sql/test_regex_utf8.sql @@ -0,0 +1,60 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ + +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif + +set client_encoding = utf8; + +set standard_conforming_strings = on; + + +-- Run the Tcl test cases that require Unicode + +-- expectMatch 9.44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ +-- "a\u0102\u02ffb" "a\u0102\u02ffb" +select * from test_regex('a[\u00fe-\u0507][\u00ff-\u0300]b', E'a\u0102\u02ffb', 'EMP*'); + +-- expectMatch 13.27 P "a\\U00001234x" "a\u1234x" "a\u1234x" +select * from test_regex('a\U00001234x', E'a\u1234x', 'P'); +-- expectMatch 13.28 P {a\U00001234x} "a\u1234x" "a\u1234x" +select * from test_regex('a\U00001234x', E'a\u1234x', 'P'); +-- expectMatch 13.29 P "a\\U0001234x" "a\u1234x" "a\u1234x" +-- Tcl has relaxed their code to allow 1-8 hex digits, but Postgres hasn't +select * from test_regex('a\U0001234x', E'a\u1234x', 'P'); +-- expectMatch 13.30 P {a\U0001234x} "a\u1234x" "a\u1234x" +-- Tcl has relaxed their code to allow 1-8 hex digits, but Postgres hasn't +select * from test_regex('a\U0001234x', E'a\u1234x', 'P'); +-- expectMatch 13.31 P "a\\U000012345x" "a\u12345x" "a\u12345x" +select * from test_regex('a\U000012345x', E'a\u12345x', 'P'); +-- expectMatch 13.32 P {a\U000012345x} "a\u12345x" "a\u12345x" +select * from test_regex('a\U000012345x', E'a\u12345x', 'P'); +-- expectMatch 13.33 P "a\\U1000000x" "a\ufffd0x" "a\ufffd0x" +-- Tcl allows this as a standalone character, but Postgres doesn't +select * from test_regex('a\U1000000x', E'a\ufffd0x', 'P'); +-- expectMatch 13.34 P {a\U1000000x} "a\ufffd0x" "a\ufffd0x" +-- Tcl allows this as a standalone character, but Postgres doesn't +select * from test_regex('a\U1000000x', E'a\ufffd0x', 'P'); + + +-- Additional tests, not derived from Tcl + +-- Exercise logic around high character ranges a bit more +select * from test_regex('a + [\u1000-\u1100]* + [\u3000-\u3100]* + [\u1234-\u25ff]+ + [\u2000-\u35ff]* + [\u2600-\u2f00]* + \u1236\u1236x', + E'a\u1234\u1236\u1236x', 'xEMP'); + +select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', + E'\u1500\u1237', 'ELMP'); +select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', + E'A\u1239', 'ELMP'); diff --git a/src/test/modules/test_regex/test_regex--1.0.sql b/src/test/modules/test_regex/test_regex--1.0.sql new file mode 100644 index 0000000000..7d991537f4 --- /dev/null +++ b/src/test/modules/test_regex/test_regex--1.0.sql @@ -0,0 +1,9 @@ +/* src/test/modules/test_regex/test_regex--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_regex" to load this file. \quit + +CREATE FUNCTION test_regex(pattern text, string text, flags text) +RETURNS SETOF text[] +STRICT +AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_regex/test_regex.c b/src/test/modules/test_regex/test_regex.c new file mode 100644 index 0000000000..ad3c6d3b1a --- /dev/null +++ b/src/test/modules/test_regex/test_regex.c @@ -0,0 +1,759 @@ +/*-------------------------------------------------------------------------- + * + * test_regex.c + * Test harness for the regular expression package. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/test/modules/test_regex/test_regex.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "miscadmin.h" +#include "regex/regex.h" +#include "utils/array.h" +#include "utils/builtins.h" + +PG_MODULE_MAGIC; + + +/* all the options of interest for regex functions */ +typedef struct test_re_flags +{ + int cflags; /* compile flags for Spencer's regex code */ + int eflags; /* execute flags for Spencer's regex code */ + long info; /* expected re_info bits */ + bool glob; /* do it globally (for each occurrence) */ + bool indices; /* report indices not actual strings */ + bool partial; /* expect partial match */ +} test_re_flags; + +/* cross-call state for test_regex() */ +typedef struct test_regex_ctx +{ + test_re_flags re_flags; /* flags */ + rm_detail_t details; /* "details" from execution */ + text *orig_str; /* data string in original TEXT form */ + int nmatches; /* number of places where pattern matched */ + int npatterns; /* number of capturing subpatterns */ + /* We store start char index and end+1 char index for each match */ + /* so the number of entries in match_locs is nmatches * npatterns * 2 */ + int *match_locs; /* 0-based character indexes */ + int next_match; /* 0-based index of next match to process */ + /* workspace for build_test_match_result() */ + Datum *elems; /* has npatterns+1 elements */ + bool *nulls; /* has npatterns+1 elements */ + pg_wchar *wide_str; /* wide-char version of original string */ + char *conv_buf; /* conversion buffer, if needed */ + int conv_bufsiz; /* size thereof */ +} test_regex_ctx; + +/* Local functions */ +static void test_re_compile(text *text_re, int cflags, Oid collation, + regex_t *result_re); +static void parse_test_flags(test_re_flags *flags, text *opts); +static test_regex_ctx *setup_test_matches(text *orig_str, + regex_t *cpattern, + test_re_flags *flags, + Oid collation, + bool use_subpatterns); +static ArrayType *build_test_info_result(regex_t *cpattern, + test_re_flags *flags); +static ArrayType *build_test_match_result(test_regex_ctx *matchctx); + + +/* + * test_regex(pattern text, string text, flags text) returns setof text[] + * + * This is largely based on regexp.c's regexp_matches, with additions + * for debugging purposes. + */ +PG_FUNCTION_INFO_V1(test_regex); + +Datum +test_regex(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + test_regex_ctx *matchctx; + ArrayType *result_ary; + + if (SRF_IS_FIRSTCALL()) + { + text *pattern = PG_GETARG_TEXT_PP(0); + text *flags = PG_GETARG_TEXT_PP(2); + Oid collation = PG_GET_COLLATION(); + test_re_flags re_flags; + regex_t cpattern; + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* Determine options */ + parse_test_flags(&re_flags, flags); + + /* set up the compiled pattern */ + test_re_compile(pattern, re_flags.cflags, collation, &cpattern); + + /* be sure to copy the input string into the multi-call ctx */ + matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern, + &re_flags, + collation, + true); + + /* Pre-create workspace that build_test_match_result needs */ + matchctx->elems = (Datum *) palloc(sizeof(Datum) * + (matchctx->npatterns + 1)); + matchctx->nulls = (bool *) palloc(sizeof(bool) * + (matchctx->npatterns + 1)); + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) matchctx; + + /* + * Return the first result row, which is info equivalent to Tcl's + * "regexp -about" output + */ + result_ary = build_test_info_result(&cpattern, &re_flags); + + pg_regfree(&cpattern); + + SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); + } + else + { + /* Each subsequent row describes one match */ + funcctx = SRF_PERCALL_SETUP(); + matchctx = (test_regex_ctx *) funcctx->user_fctx; + + if (matchctx->next_match < matchctx->nmatches) + { + result_ary = build_test_match_result(matchctx); + matchctx->next_match++; + SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); + } + } + + SRF_RETURN_DONE(funcctx); +} + + +/* + * test_re_compile - compile a RE + * + * text_re --- the pattern, expressed as a TEXT object + * cflags --- compile options for the pattern + * collation --- collation to use for LC_CTYPE-dependent behavior + * result_re --- output, compiled RE is stored here + * + * Pattern is given in the database encoding. We internally convert to + * an array of pg_wchar, which is what Spencer's regex package wants. + * + * Caller must eventually pg_regfree the resulting RE to avoid memory leaks. + */ +static void +test_re_compile(text *text_re, int cflags, Oid collation, + regex_t *result_re) +{ + int text_re_len = VARSIZE_ANY_EXHDR(text_re); + char *text_re_val = VARDATA_ANY(text_re); + pg_wchar *pattern; + int pattern_len; + int regcomp_result; + char errMsg[100]; + + /* Convert pattern string to wide characters */ + pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar)); + pattern_len = pg_mb2wchar_with_len(text_re_val, + pattern, + text_re_len); + + regcomp_result = pg_regcomp(result_re, + pattern, + pattern_len, + cflags, + collation); + + pfree(pattern); + + if (regcomp_result != REG_OKAY) + { + /* re didn't compile (no need for pg_regfree, if so) */ + + /* + * Here and in other places in this file, do CHECK_FOR_INTERRUPTS + * before reporting a regex error. This is so that if the regex + * library aborts and returns REG_CANCEL, we don't print an error + * message that implies the regex was invalid. + */ + CHECK_FOR_INTERRUPTS(); + + pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regular expression: %s", errMsg))); + } +} + +/* + * test_re_execute - execute a RE on pg_wchar data + * + * Returns true on match, false on no match + * Arguments are as for pg_regexec + */ +static bool +test_re_execute(regex_t *re, pg_wchar *data, int data_len, + int start_search, + rm_detail_t *details, + int nmatch, regmatch_t *pmatch, + int eflags) +{ + int regexec_result; + char errMsg[100]; + + /* Initialize match locations in case engine doesn't */ + details->rm_extend.rm_so = -1; + details->rm_extend.rm_eo = -1; + for (int i = 0; i < nmatch; i++) + { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } + + /* Perform RE match and return result */ + regexec_result = pg_regexec(re, + data, + data_len, + start_search, + details, + nmatch, + pmatch, + eflags); + + if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) + { + /* re failed??? */ + CHECK_FOR_INTERRUPTS(); + pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("regular expression failed: %s", errMsg))); + } + + return (regexec_result == REG_OKAY); +} + + +/* + * parse_test_flags - parse the flags argument + * + * flags --- output argument, filled with desired options + * opts --- TEXT object, or NULL for defaults + */ +static void +parse_test_flags(test_re_flags *flags, text *opts) +{ + /* these defaults must match Tcl's */ + int cflags = REG_ADVANCED; + int eflags = 0; + long info = 0; + + flags->glob = false; + flags->indices = false; + flags->partial = false; + + if (opts) + { + char *opt_p = VARDATA_ANY(opts); + int opt_len = VARSIZE_ANY_EXHDR(opts); + int i; + + for (i = 0; i < opt_len; i++) + { + switch (opt_p[i]) + { + case '-': + /* allowed, no-op */ + break; + case '!': + flags->partial = true; + break; + case '*': + /* test requires Unicode --- ignored here */ + break; + case '0': + flags->indices = true; + break; + + /* These flags correspond to user-exposed RE options: */ + case 'g': /* global match */ + flags->glob = true; + break; + case 'i': /* case insensitive */ + cflags |= REG_ICASE; + break; + case 'n': /* \n affects ^ $ . [^ */ + cflags |= REG_NEWLINE; + break; + case 'p': /* ~Perl, \n affects . [^ */ + cflags |= REG_NLSTOP; + cflags &= ~REG_NLANCH; + break; + case 'w': /* weird, \n affects ^ $ only */ + cflags &= ~REG_NLSTOP; + cflags |= REG_NLANCH; + break; + case 'x': /* expanded syntax */ + cflags |= REG_EXPANDED; + break; + + /* These flags correspond to Tcl's -xflags options: */ + case 'a': + cflags |= REG_ADVF; + break; + case 'b': + cflags &= ~REG_ADVANCED; + break; + case 'c': + + /* + * Tcl calls this TCL_REG_CANMATCH, but it's really + * REG_EXPECT. In this implementation we must also set + * the partial and indices flags, so that + * setup_test_matches and build_test_match_result will + * emit the desired data. (They'll emit more fields than + * Tcl would, but that's fine.) + */ + cflags |= REG_EXPECT; + flags->partial = true; + flags->indices = true; + break; + case 'e': + cflags &= ~REG_ADVANCED; + cflags |= REG_EXTENDED; + break; + case 'q': + cflags &= ~REG_ADVANCED; + cflags |= REG_QUOTE; + break; + case 'o': /* o for opaque */ + cflags |= REG_NOSUB; + break; + case 's': /* s for start */ + cflags |= REG_BOSONLY; + break; + case '+': + cflags |= REG_FAKE; + break; + case ',': + cflags |= REG_PROGRESS; + break; + case '.': + cflags |= REG_DUMP; + break; + case ':': + eflags |= REG_MTRACE; + break; + case ';': + eflags |= REG_FTRACE; + break; + case '^': + eflags |= REG_NOTBOL; + break; + case '$': + eflags |= REG_NOTEOL; + break; + case 't': + cflags |= REG_EXPECT; + break; + case '%': + eflags |= REG_SMALL; + break; + + /* These flags define expected info bits: */ + case 'A': + info |= REG_UBSALNUM; + break; + case 'B': + info |= REG_UBRACES; + break; + case 'E': + info |= REG_UBBS; + break; + case 'H': + info |= REG_ULOOKAROUND; + break; + case 'I': + info |= REG_UIMPOSSIBLE; + break; + case 'L': + info |= REG_ULOCALE; + break; + case 'M': + info |= REG_UUNPORT; + break; + case 'N': + info |= REG_UEMPTYMATCH; + break; + case 'P': + info |= REG_UNONPOSIX; + break; + case 'Q': + info |= REG_UBOUNDS; + break; + case 'R': + info |= REG_UBACKREF; + break; + case 'S': + info |= REG_UUNSPEC; + break; + case 'T': + info |= REG_USHORTEST; + break; + case 'U': + info |= REG_UPBOTCH; + break; + + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid regular expression test option: \"%.*s\"", + pg_mblen(opt_p + i), opt_p + i))); + break; + } + } + } + flags->cflags = cflags; + flags->eflags = eflags; + flags->info = info; +} + +/* + * setup_test_matches --- do the initial matching + * + * To simplify memory management, we do all the matching in one swoop. + * The returned test_regex_ctx contains the locations of all the substrings + * matching the pattern. + */ +static test_regex_ctx * +setup_test_matches(text *orig_str, + regex_t *cpattern, test_re_flags *re_flags, + Oid collation, + bool use_subpatterns) +{ + test_regex_ctx *matchctx = palloc0(sizeof(test_regex_ctx)); + int eml = pg_database_encoding_max_length(); + int orig_len; + pg_wchar *wide_str; + int wide_len; + regmatch_t *pmatch; + int pmatch_len; + int array_len; + int array_idx; + int prev_match_end; + int start_search; + int maxlen = 0; /* largest fetch length in characters */ + + /* save flags */ + matchctx->re_flags = *re_flags; + + /* save original string --- we'll extract result substrings from it */ + matchctx->orig_str = orig_str; + + /* convert string to pg_wchar form for matching */ + orig_len = VARSIZE_ANY_EXHDR(orig_str); + wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1)); + wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len); + + /* do we want to remember subpatterns? */ + if (use_subpatterns && cpattern->re_nsub > 0) + { + matchctx->npatterns = cpattern->re_nsub + 1; + pmatch_len = cpattern->re_nsub + 1; + } + else + { + use_subpatterns = false; + matchctx->npatterns = 1; + pmatch_len = 1; + } + + /* temporary output space for RE package */ + pmatch = palloc(sizeof(regmatch_t) * pmatch_len); + + /* + * the real output space (grown dynamically if needed) + * + * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather + * than at 2^27 + */ + array_len = re_flags->glob ? 255 : 31; + matchctx->match_locs = (int *) palloc(sizeof(int) * array_len); + array_idx = 0; + + /* search for the pattern, perhaps repeatedly */ + prev_match_end = 0; + start_search = 0; + while (test_re_execute(cpattern, wide_str, wide_len, + start_search, + &matchctx->details, + pmatch_len, pmatch, + re_flags->eflags)) + { + /* enlarge output space if needed */ + while (array_idx + matchctx->npatterns * 2 + 1 > array_len) + { + array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */ + if (array_len > MaxAllocSize / sizeof(int)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many regular expression matches"))); + matchctx->match_locs = (int *) repalloc(matchctx->match_locs, + sizeof(int) * array_len); + } + + /* save this match's locations */ + for (int i = 0; i < matchctx->npatterns; i++) + { + int so = pmatch[i].rm_so; + int eo = pmatch[i].rm_eo; + + matchctx->match_locs[array_idx++] = so; + matchctx->match_locs[array_idx++] = eo; + if (so >= 0 && eo >= 0 && (eo - so) > maxlen) + maxlen = (eo - so); + } + matchctx->nmatches++; + prev_match_end = pmatch[0].rm_eo; + + /* if not glob, stop after one match */ + if (!re_flags->glob) + break; + + /* + * Advance search position. Normally we start the next search at the + * end of the previous match; but if the match was of zero length, we + * have to advance by one character, or we'd just find the same match + * again. + */ + start_search = prev_match_end; + if (pmatch[0].rm_so == pmatch[0].rm_eo) + start_search++; + if (start_search > wide_len) + break; + } + + /* + * If we had no match, but "partial" and "indices" are set, emit the + * details. + */ + if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices) + { + matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so; + matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo; + /* we don't have pmatch data, so emit -1 */ + for (int i = 1; i < matchctx->npatterns; i++) + { + matchctx->match_locs[array_idx++] = -1; + matchctx->match_locs[array_idx++] = -1; + } + matchctx->nmatches++; + } + + if (eml > 1) + { + int64 maxsiz = eml * (int64) maxlen; + int conv_bufsiz; + + /* + * Make the conversion buffer large enough for any substring of + * interest. + * + * Worst case: assume we need the maximum size (maxlen*eml), but take + * advantage of the fact that the original string length in bytes is + * an upper bound on the byte length of any fetched substring (and we + * know that len+1 is safe to allocate because the varlena header is + * longer than 1 byte). + */ + if (maxsiz > orig_len) + conv_bufsiz = orig_len + 1; + else + conv_bufsiz = maxsiz + 1; /* safe since maxsiz < 2^30 */ + + matchctx->conv_buf = palloc(conv_bufsiz); + matchctx->conv_bufsiz = conv_bufsiz; + matchctx->wide_str = wide_str; + } + else + { + /* No need to keep the wide string if we're in a single-byte charset. */ + pfree(wide_str); + matchctx->wide_str = NULL; + matchctx->conv_buf = NULL; + matchctx->conv_bufsiz = 0; + } + + /* Clean up temp storage */ + pfree(pmatch); + + return matchctx; +} + +/* + * build_test_info_result - build output array describing compiled regexp + * + * This borrows some code from Tcl's TclRegAbout(). + */ +static ArrayType * +build_test_info_result(regex_t *cpattern, test_re_flags *flags) +{ + /* Translation data for flag bits in regex_t.re_info */ + struct infoname + { + int bit; + const char *text; + }; + static const struct infoname infonames[] = { + {REG_UBACKREF, "REG_UBACKREF"}, + {REG_ULOOKAROUND, "REG_ULOOKAROUND"}, + {REG_UBOUNDS, "REG_UBOUNDS"}, + {REG_UBRACES, "REG_UBRACES"}, + {REG_UBSALNUM, "REG_UBSALNUM"}, + {REG_UPBOTCH, "REG_UPBOTCH"}, + {REG_UBBS, "REG_UBBS"}, + {REG_UNONPOSIX, "REG_UNONPOSIX"}, + {REG_UUNSPEC, "REG_UUNSPEC"}, + {REG_UUNPORT, "REG_UUNPORT"}, + {REG_ULOCALE, "REG_ULOCALE"}, + {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"}, + {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"}, + {REG_USHORTEST, "REG_USHORTEST"}, + {0, NULL} + }; + const struct infoname *inf; + Datum elems[lengthof(infonames) + 1]; + int nresults = 0; + char buf[80]; + int dims[1]; + int lbs[1]; + + /* Set up results: first, the number of subexpressions */ + snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub); + elems[nresults++] = PointerGetDatum(cstring_to_text(buf)); + + /* Report individual info bit states */ + for (inf = infonames; inf->bit != 0; inf++) + { + if (cpattern->re_info & inf->bit) + { + if (flags->info & inf->bit) + elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text)); + else + { + snprintf(buf, sizeof(buf), "unexpected %s!", inf->text); + elems[nresults++] = PointerGetDatum(cstring_to_text(buf)); + } + } + else + { + if (flags->info & inf->bit) + { + snprintf(buf, sizeof(buf), "missing %s!", inf->text); + elems[nresults++] = PointerGetDatum(cstring_to_text(buf)); + } + } + } + + /* And form an array */ + dims[0] = nresults; + lbs[0] = 1; + /* XXX: this hardcodes assumptions about the text type */ + return construct_md_array(elems, NULL, 1, dims, lbs, + TEXTOID, -1, false, TYPALIGN_INT); +} + +/* + * build_test_match_result - build output array for current match + * + * Note that if the indices flag is set, we don't need any strings, + * just the location data. + */ +static ArrayType * +build_test_match_result(test_regex_ctx *matchctx) +{ + char *buf = matchctx->conv_buf; + Datum *elems = matchctx->elems; + bool *nulls = matchctx->nulls; + bool indices = matchctx->re_flags.indices; + char bufstr[80]; + int dims[1]; + int lbs[1]; + int loc; + int i; + + /* Extract matching substrings from the original string */ + loc = matchctx->next_match * matchctx->npatterns * 2; + for (i = 0; i < matchctx->npatterns; i++) + { + int so = matchctx->match_locs[loc++]; + int eo = matchctx->match_locs[loc++]; + + if (indices) + { + /* Report eo this way for consistency with Tcl */ + snprintf(bufstr, sizeof(bufstr), "%d %d", + so, so < 0 ? eo : eo - 1); + elems[i] = PointerGetDatum(cstring_to_text(bufstr)); + nulls[i] = false; + } + else if (so < 0 || eo < 0) + { + elems[i] = (Datum) 0; + nulls[i] = true; + } + else if (buf) + { + int len = pg_wchar2mb_with_len(matchctx->wide_str + so, + buf, + eo - so); + + Assert(len < matchctx->conv_bufsiz); + elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len)); + nulls[i] = false; + } + else + { + elems[i] = DirectFunctionCall3(text_substr, + PointerGetDatum(matchctx->orig_str), + Int32GetDatum(so + 1), + Int32GetDatum(eo - so)); + nulls[i] = false; + } + } + + /* In EXPECT indices mode, also report the "details" */ + if (indices && (matchctx->re_flags.cflags & REG_EXPECT)) + { + int so = matchctx->details.rm_extend.rm_so; + int eo = matchctx->details.rm_extend.rm_eo; + + snprintf(bufstr, sizeof(bufstr), "%d %d", + so, so < 0 ? eo : eo - 1); + elems[i] = PointerGetDatum(cstring_to_text(bufstr)); + nulls[i] = false; + i++; + } + + /* And form an array */ + dims[0] = i; + lbs[0] = 1; + /* XXX: this hardcodes assumptions about the text type */ + return construct_md_array(elems, nulls, 1, dims, lbs, + TEXTOID, -1, false, TYPALIGN_INT); +} diff --git a/src/test/modules/test_regex/test_regex.control b/src/test/modules/test_regex/test_regex.control new file mode 100644 index 0000000000..bfce1009cc --- /dev/null +++ b/src/test/modules/test_regex/test_regex.control @@ -0,0 +1,4 @@ +comment = 'Test code for backend/regex/' +default_version = '1.0' +module_pathname = '$libdir/test_regex' +relocatable = true