diff --git a/src/backend/regex/README b/src/backend/regex/README index a83ab5074d..cafeb3dffb 100644 --- a/src/backend/regex/README +++ b/src/backend/regex/README @@ -129,9 +129,9 @@ If not, we can reject the match immediately without iterating through many possibilities. As an example, consider the regex "(a[bc]+)\1". The compiled -representation will have a top-level concatenation subre node. Its left +representation will have a top-level concatenation subre node. Its first child is a capture node, and the child of that is a plain DFA node for -"a[bc]+". The concatenation's right child is a backref node for \1. +"a[bc]+". The concatenation's second child is a backref node for \1. The DFA associated with the concatenation node will be "a[bc]+a[bc]+", where the backref has been replaced by a copy of the DFA for its referent expression. When executed, the concatenation node will have to search for diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 0182e02db1..c0680b280b 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -58,6 +58,7 @@ static void processlacon(struct vars *, struct state *, struct state *, int, struct state *, struct state *); static struct subre *subre(struct vars *, int, int, struct state *, struct state *); static void freesubre(struct vars *, struct subre *); +static void freesubreandsiblings(struct vars *, struct subre *); static void freesrnode(struct vars *, struct subre *); static void optst(struct vars *, struct subre *); static int numst(struct subre *, int); @@ -652,8 +653,8 @@ makesearch(struct vars *v, * parse - parse an RE * * This is actually just the top level, which parses a bunch of branches - * tied together with '|'. They appear in the tree as the left children - * of a chain of '|' subres. + * tied together with '|'. If there's more than one, they appear in the + * tree as the children of a '|' subre. */ static struct subre * parse(struct vars *v, @@ -662,41 +663,34 @@ parse(struct vars *v, struct state *init, /* initial state */ struct state *final) /* final state */ { - struct state *left; /* scaffolding for branch */ - struct state *right; struct subre *branches; /* top level */ - struct subre *branch; /* current branch */ - struct subre *t; /* temporary */ - int firstbranch; /* is this the first branch? */ + struct subre *lastbranch; /* latest branch */ assert(stopper == ')' || stopper == EOS); branches = subre(v, '|', LONGER, init, final); NOERRN(); - branch = branches; - firstbranch = 1; + lastbranch = NULL; do { /* a branch */ - if (!firstbranch) - { - /* need a place to hang it */ - branch->right = subre(v, '|', LONGER, init, final); - NOERRN(); - branch = branch->right; - } - firstbranch = 0; + struct subre *branch; + struct state *left; /* scaffolding for branch */ + struct state *right; + left = newstate(v->nfa); right = newstate(v->nfa); NOERRN(); EMPTYARC(init, left); EMPTYARC(right, final); NOERRN(); - branch->left = parsebranch(v, stopper, type, left, right, 0); + branch = parsebranch(v, stopper, type, left, right, 0); NOERRN(); - branch->flags |= UP(branch->flags | branch->left->flags); - if ((branch->flags & ~branches->flags) != 0) /* new flags */ - for (t = branches; t != branch; t = t->right) - t->flags |= branch->flags; + if (lastbranch) + lastbranch->sibling = branch; + else + branches->child = branch; + branches->flags |= UP(branches->flags | branch->flags); + lastbranch = branch; } while (EAT('|')); assert(SEE(stopper) || SEE(EOS)); @@ -707,20 +701,16 @@ parse(struct vars *v, } /* optimize out simple cases */ - if (branch == branches) + if (lastbranch == branches->child) { /* only one branch */ - assert(branch->right == NULL); - t = branch->left; - branch->left = NULL; - freesubre(v, branches); - branches = t; + assert(lastbranch->sibling == NULL); + freesrnode(v, branches); + branches = lastbranch; } else if (!MESSY(branches->flags)) { /* no interesting innards */ - freesubre(v, branches->left); - branches->left = NULL; - freesubre(v, branches->right); - branches->right = NULL; + freesubreandsiblings(v, branches->child); + branches->child = NULL; branches->op = '='; } @@ -972,7 +962,7 @@ parseqatom(struct vars *v, t = subre(v, '(', atom->flags | CAP, lp, rp); NOERR(); t->subno = subno; - t->left = atom; + t->child = atom; atom = t; } /* postpone everything else pending possible {0} */ @@ -1120,26 +1110,27 @@ parseqatom(struct vars *v, /* break remaining subRE into x{...} and what follows */ t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); NOERR(); - t->left = atom; - atomp = &t->left; + t->child = atom; + atomp = &t->child; /* - * Here we should recurse to fill t->right ... but we must postpone that - * to the end. + * Here we should recurse to fill t->child->sibling ... but we must + * postpone that to the end. One reason is that t->child may be replaced + * below, and we don't want to worry about its sibling link. */ /* - * Convert top node to a concatenation of the prefix (top->left, covering + * Convert top node to a concatenation of the prefix (top->child, covering * whatever we parsed previously) and remaining (t). Note that the prefix * could be empty, in which case this concatenation node is unnecessary. * To keep things simple, we operate in a general way for now, and get rid * of unnecessary subres below. */ - assert(top->op == '=' && top->left == NULL && top->right == NULL); - top->left = subre(v, '=', top->flags, top->begin, lp); + assert(top->op == '=' && top->child == NULL); + top->child = subre(v, '=', top->flags, top->begin, lp); NOERR(); top->op = '.'; - top->right = t; + top->child->sibling = t; /* top->flags will get updated later */ /* if it's a backref, now is the time to replicate the subNFA */ @@ -1201,9 +1192,9 @@ parseqatom(struct vars *v, f = COMBINE(qprefer, atom->flags); t = subre(v, '.', f, s, atom->end); /* prefix and atom */ NOERR(); - t->left = subre(v, '=', PREF(f), s, atom->begin); + t->child = subre(v, '=', PREF(f), s, atom->begin); NOERR(); - t->right = atom; + t->child->sibling = atom; *atomp = t; /* rest of branch can be strung starting from atom->end */ s2 = atom->end; @@ -1222,44 +1213,43 @@ parseqatom(struct vars *v, NOERR(); t->min = (short) m; t->max = (short) n; - t->left = atom; + t->child = atom; *atomp = t; /* rest of branch is to be strung from iteration's end state */ } /* and finally, look after that postponed recursion */ - t = top->right; + t = top->child->sibling; if (!(SEE('|') || SEE(stopper) || SEE(EOS))) { - /* parse all the rest of the branch, and insert in t->right */ - t->right = parsebranch(v, stopper, type, s2, rp, 1); + /* parse all the rest of the branch, and insert in t->child->sibling */ + t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1); NOERR(); assert(SEE('|') || SEE(stopper) || SEE(EOS)); /* here's the promised update of the flags */ - t->flags |= COMBINE(t->flags, t->right->flags); + t->flags |= COMBINE(t->flags, t->child->sibling->flags); top->flags |= COMBINE(top->flags, t->flags); /* * At this point both top and t are concatenation (op == '.') subres, - * and we have top->left = prefix of branch, top->right = t, t->left = - * messy atom (with quantification superstructure if needed), t->right - * = rest of branch. + * and we have top->child = prefix of branch, top->child->sibling = t, + * t->child = messy atom (with quantification superstructure if + * needed), t->child->sibling = rest of branch. * - * If the messy atom was the first thing in the branch, then top->left - * is vacuous and we can get rid of one level of concatenation. Since - * the caller is holding a pointer to the top node, we can't remove - * that node; but we're allowed to change its properties. + * If the messy atom was the first thing in the branch, then + * top->child is vacuous and we can get rid of one level of + * concatenation. Since the caller is holding a pointer to the top + * node, we can't remove that node; but we're allowed to change its + * properties. */ - assert(top->left->op == '='); - if (top->left->begin == top->left->end) + assert(top->child->op == '='); + if (top->child->begin == top->child->end) { - assert(!MESSY(top->left->flags)); - freesubre(v, top->left); - top->left = t->left; - top->right = t->right; - t->left = t->right = NULL; - freesubre(v, t); + assert(!MESSY(top->child->flags)); + freesubre(v, top->child); + top->child = t->child; + freesrnode(v, t); } } else @@ -1269,34 +1259,31 @@ parseqatom(struct vars *v, * concatenation node 't'. Just link s2 straight to rp. */ EMPTYARC(s2, rp); - top->right = t->left; - top->flags |= COMBINE(top->flags, top->right->flags); - t->left = t->right = NULL; - freesubre(v, t); + top->child->sibling = t->child; + top->flags |= COMBINE(top->flags, top->child->sibling->flags); + freesrnode(v, t); /* - * Again, it could be that top->left is vacuous (if the messy atom was - * in fact the only thing in the branch). In that case we need no - * concatenation at all; just replace top with top->right. + * Again, it could be that top->child is vacuous (if the messy atom + * was in fact the only thing in the branch). In that case we need no + * concatenation at all; just replace top with top->child->sibling. */ - assert(top->left->op == '='); - if (top->left->begin == top->left->end) + assert(top->child->op == '='); + if (top->child->begin == top->child->end) { - assert(!MESSY(top->left->flags)); - freesubre(v, top->left); - t = top->right; + assert(!MESSY(top->child->flags)); + t = top->child->sibling; + freesubre(v, top->child); top->op = t->op; top->flags = t->flags; top->id = t->id; top->subno = t->subno; top->min = t->min; top->max = t->max; - top->left = t->left; - top->right = t->right; + top->child = t->child; top->begin = t->begin; top->end = t->end; - t->left = t->right = NULL; - freesubre(v, t); + freesrnode(v, t); } } } @@ -1786,7 +1773,7 @@ subre(struct vars *v, } if (ret != NULL) - v->treefree = ret->left; + v->treefree = ret->child; else { ret = (struct subre *) MALLOC(sizeof(struct subre)); @@ -1806,8 +1793,8 @@ subre(struct vars *v, ret->id = 0; /* will be assigned later */ ret->subno = 0; ret->min = ret->max = 1; - ret->left = NULL; - ret->right = NULL; + ret->child = NULL; + ret->sibling = NULL; ret->begin = begin; ret->end = end; ZAPCNFA(ret->cnfa); @@ -1817,6 +1804,9 @@ subre(struct vars *v, /* * freesubre - free a subRE subtree + * + * This frees child node(s) of the given subRE too, + * but not its siblings. */ static void freesubre(struct vars *v, /* might be NULL */ @@ -1825,14 +1815,31 @@ freesubre(struct vars *v, /* might be NULL */ if (sr == NULL) return; - if (sr->left != NULL) - freesubre(v, sr->left); - if (sr->right != NULL) - freesubre(v, sr->right); + if (sr->child != NULL) + freesubreandsiblings(v, sr->child); freesrnode(v, sr); } +/* + * freesubreandsiblings - free a subRE subtree + * + * This frees child node(s) of the given subRE too, + * as well as any following siblings. + */ +static void +freesubreandsiblings(struct vars *v, /* might be NULL */ + struct subre *sr) +{ + while (sr != NULL) + { + struct subre *next = sr->sibling; + + freesubre(v, sr); + sr = next; + } +} + /* * freesrnode - free one node in a subRE subtree */ @@ -1850,7 +1857,7 @@ freesrnode(struct vars *v, /* might be NULL */ if (v != NULL && v->treechain != NULL) { /* we're still parsing, maybe we can reuse the subre */ - sr->left = v->treefree; + sr->child = v->treefree; v->treefree = sr; } else @@ -1881,15 +1888,14 @@ numst(struct subre *t, int start) /* starting point for subtree numbers */ { int i; + struct subre *t2; assert(t != NULL); i = start; t->id = (short) i++; - if (t->left != NULL) - i = numst(t->left, i); - if (t->right != NULL) - i = numst(t->right, i); + for (t2 = t->child; t2 != NULL; t2 = t2->sibling) + i = numst(t2, i); return i; } @@ -1913,13 +1919,13 @@ numst(struct subre *t, static void markst(struct subre *t) { + struct subre *t2; + assert(t != NULL); t->flags |= INUSE; - if (t->left != NULL) - markst(t->left); - if (t->right != NULL) - markst(t->right); + for (t2 = t->child; t2 != NULL; t2 = t2->sibling) + markst(t2); } /* @@ -1949,12 +1955,12 @@ nfatree(struct vars *v, struct subre *t, FILE *f) /* for debug output */ { + struct subre *t2; + assert(t != NULL && t->begin != NULL); - if (t->left != NULL) - (DISCARD) nfatree(v, t->left, f); - if (t->right != NULL) - (DISCARD) nfatree(v, t->right, f); + for (t2 = t->child; t2 != NULL; t2 = t2->sibling) + (DISCARD) nfatree(v, t2, f); return nfanode(v, t, 0, f); } @@ -2206,6 +2212,7 @@ stdump(struct subre *t, int nfapresent) /* is the original NFA still around? */ { char idbuf[50]; + struct subre *t2; fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); if (t->flags & LONGER) @@ -2231,20 +2238,21 @@ stdump(struct subre *t, } if (nfapresent) fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no); - if (t->left != NULL) - fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); - if (t->right != NULL) - fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); + if (t->child != NULL) + fprintf(f, " C:%s", stid(t->child, idbuf, sizeof(idbuf))); + /* printing second child isn't necessary, but it is often helpful */ + if (t->child != NULL && t->child->sibling != NULL) + fprintf(f, " C2:%s", stid(t->child->sibling, idbuf, sizeof(idbuf))); + if (t->sibling != NULL) + fprintf(f, " S:%s", stid(t->sibling, idbuf, sizeof(idbuf))); if (!NULLCNFA(t->cnfa)) { fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); } fprintf(f, "\n"); - if (t->left != NULL) - stdump(t->left, f, nfapresent); - if (t->right != NULL) - stdump(t->right, f, nfapresent); + for (t2 = t->child; t2 != NULL; t2 = t2->sibling) + stdump(t2, f, nfapresent); } /* diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index adcbcc0a8e..4541cf9a7e 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -640,6 +640,8 @@ static void zaptreesubs(struct vars *v, struct subre *t) { + struct subre *t2; + if (t->op == '(') { int n = t->subno; @@ -652,10 +654,8 @@ zaptreesubs(struct vars *v, } } - if (t->left != NULL) - zaptreesubs(v, t->left); - if (t->right != NULL) - zaptreesubs(v, t->right); + for (t2 = t->child; t2 != NULL; t2 = t2->sibling) + zaptreesubs(v, t2); } /* @@ -714,35 +714,35 @@ cdissect(struct vars *v, switch (t->op) { case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); + assert(t->child == NULL); er = REG_OKAY; /* no action, parent did the work */ break; case 'b': /* back reference */ - assert(t->left == NULL && t->right == NULL); + assert(t->child == NULL); er = cbrdissect(v, t, begin, end); break; case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - if (t->left->flags & SHORTER) /* reverse scan */ + assert(t->child != NULL); + if (t->child->flags & SHORTER) /* reverse scan */ er = crevcondissect(v, t, begin, end); else er = ccondissect(v, t, begin, end); break; case '|': /* alternation */ - assert(t->left != NULL); + assert(t->child != NULL); er = caltdissect(v, t, begin, end); break; case '*': /* iteration */ - assert(t->left != NULL); - if (t->left->flags & SHORTER) /* reverse scan */ + assert(t->child != NULL); + if (t->child->flags & SHORTER) /* reverse scan */ er = creviterdissect(v, t, begin, end); else er = citerdissect(v, t, begin, end); break; case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); + assert(t->child != NULL); assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); + er = cdissect(v, t->child, begin, end); if (er == REG_OKAY) subset(v, t, begin, end); break; @@ -770,19 +770,22 @@ ccondissect(struct vars *v, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { + struct subre *left = t->child; + struct subre *right = left->sibling; struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(!(t->left->flags & SHORTER)); + assert(left != NULL && left->cnfa.nstates > 0); + assert(right != NULL && right->cnfa.nstates > 0); + assert(right->sibling == NULL); + assert(!(left->flags & SHORTER)); - d = getsubdfa(v, t->left); + d = getsubdfa(v, left); NOERR(); - d2 = getsubdfa(v, t->right); + d2 = getsubdfa(v, right); NOERR(); MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); @@ -799,10 +802,10 @@ ccondissect(struct vars *v, /* try this midpoint on for size */ if (longest(v, d2, mid, end, (int *) NULL) == end) { - er = cdissect(v, t->left, begin, mid); + er = cdissect(v, left, begin, mid); if (er == REG_OKAY) { - er = cdissect(v, t->right, mid, end); + er = cdissect(v, right, mid, end); if (er == REG_OKAY) { /* satisfaction */ @@ -831,8 +834,8 @@ ccondissect(struct vars *v, return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); - zaptreesubs(v, t->left); - zaptreesubs(v, t->right); + zaptreesubs(v, left); + zaptreesubs(v, right); } /* can't get here */ @@ -848,19 +851,22 @@ crevcondissect(struct vars *v, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { + struct subre *left = t->child; + struct subre *right = left->sibling; struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(t->left->flags & SHORTER); + assert(left != NULL && left->cnfa.nstates > 0); + assert(right != NULL && right->cnfa.nstates > 0); + assert(right->sibling == NULL); + assert(left->flags & SHORTER); - d = getsubdfa(v, t->left); + d = getsubdfa(v, left); NOERR(); - d2 = getsubdfa(v, t->right); + d2 = getsubdfa(v, right); NOERR(); MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); @@ -877,10 +883,10 @@ crevcondissect(struct vars *v, /* try this midpoint on for size */ if (longest(v, d2, mid, end, (int *) NULL) == end) { - er = cdissect(v, t->left, begin, mid); + er = cdissect(v, left, begin, mid); if (er == REG_OKAY) { - er = cdissect(v, t->right, mid, end); + er = cdissect(v, right, mid, end); if (er == REG_OKAY) { /* satisfaction */ @@ -909,8 +915,8 @@ crevcondissect(struct vars *v, return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); - zaptreesubs(v, t->left); - zaptreesubs(v, t->right); + zaptreesubs(v, left); + zaptreesubs(v, right); } /* can't get here */ @@ -1011,26 +1017,30 @@ caltdissect(struct vars *v, struct dfa *d; int er; - /* We loop, rather than tail-recurse, to handle a chain of alternatives */ + assert(t->op == '|'); + + t = t->child; + /* there should be at least 2 alternatives */ + assert(t != NULL && t->sibling != NULL); + while (t != NULL) { - assert(t->op == '|'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); + assert(t->cnfa.nstates > 0); MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); - d = getsubdfa(v, t->left); + d = getsubdfa(v, t); NOERR(); if (longest(v, d, begin, end, (int *) NULL) == end) { MDEBUG(("%d: caltdissect matched\n", t->id)); - er = cdissect(v, t->left, begin, end); + er = cdissect(v, t, begin, end); if (er != REG_NOMATCH) return er; } NOERR(); - t = t->right; + t = t->sibling; } return REG_NOMATCH; @@ -1056,8 +1066,8 @@ citerdissect(struct vars *v, int er; assert(t->op == '*'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(!(t->left->flags & SHORTER)); + assert(t->child != NULL && t->child->cnfa.nstates > 0); + assert(!(t->child->flags & SHORTER)); assert(begin <= end); MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); @@ -1094,7 +1104,7 @@ citerdissect(struct vars *v, return REG_ESPACE; endpts[0] = begin; - d = getsubdfa(v, t->left); + d = getsubdfa(v, t->child); if (ISERR()) { FREE(endpts); @@ -1172,8 +1182,8 @@ citerdissect(struct vars *v, for (i = nverified + 1; i <= k; i++) { - zaptreesubs(v, t->left); - er = cdissect(v, t->left, endpts[i - 1], endpts[i]); + zaptreesubs(v, t->child); + er = cdissect(v, t->child, endpts[i - 1], endpts[i]); if (er == REG_OKAY) { nverified = i; @@ -1258,8 +1268,8 @@ creviterdissect(struct vars *v, int er; assert(t->op == '*'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->left->flags & SHORTER); + assert(t->child != NULL && t->child->cnfa.nstates > 0); + assert(t->child->flags & SHORTER); assert(begin <= end); MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); @@ -1299,7 +1309,7 @@ creviterdissect(struct vars *v, return REG_ESPACE; endpts[0] = begin; - d = getsubdfa(v, t->left); + d = getsubdfa(v, t->child); if (ISERR()) { FREE(endpts); @@ -1383,8 +1393,8 @@ creviterdissect(struct vars *v, for (i = nverified + 1; i <= k; i++) { - zaptreesubs(v, t->left); - er = cdissect(v, t->left, endpts[i - 1], endpts[i]); + zaptreesubs(v, t->child); + er = cdissect(v, t->child, endpts[i - 1], endpts[i]); if (er == REG_OKAY) { nverified = i; diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 82e761bfe5..90ee16957a 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -423,15 +423,17 @@ struct cnfa * '=' plain regex without interesting substructure (implemented as DFA) * 'b' back-reference (has no substructure either) * '(' capture node: captures the match of its single child - * '.' concatenation: matches a match for left, then a match for right - * '|' alternation: matches a match for left or a match for right + * '.' concatenation: matches a match for first child, then second child + * '|' alternation: matches a match for any of its children * '*' iteration: matches some number of matches of its single child * - * Note: the right child of an alternation must be another alternation or - * NULL; hence, an N-way branch requires N alternation nodes, not N-1 as you - * might expect. This could stand to be changed. Actually I'd rather see - * a single alternation node with N children, but that will take revising - * the representation of struct subre. + * An alternation node can have any number of children (but at least two), + * linked through their sibling fields. + * + * A concatenation node must have exactly two children. It might be useful + * to support more, but that would complicate the executor. Note that it is + * the first child's greediness that determines the node's preference for + * where to split a match. * * Note: when a backref is directly quantified, we stick the min/max counts * into the backref rather than plastering an iteration node on top. This is @@ -460,8 +462,8 @@ struct subre * LATYPE code for lookaround constraint */ short min; /* min repetitions for iteration or backref */ short max; /* max repetitions for iteration or backref */ - struct subre *left; /* left child, if any (also freelist chain) */ - struct subre *right; /* right child, if any */ + struct subre *child; /* first child, if any (also freelist chain) */ + struct subre *sibling; /* next child of same parent, if any */ struct state *begin; /* outarcs from here... */ struct state *end; /* ...ending in inarcs here */ struct cnfa cnfa; /* compacted NFA, if any */