URI: 
       tlibregexp: fix ambiguous match selection - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit a7511dd43d9afe8025a6d7bd2fcccf8f594a6f2b
   DIR parent 6c6117397fb186253bd8754ecfd4786d1d1371f6
  HTML Author: Russ Cox <rsc@swtch.com>
       Date:   Fri,  7 Dec 2007 15:32:45 -0500
       
       libregexp: fix ambiguous match selection
       
               echo SYSSYSR1 | sed 's/SYS.+/sysr1/'
       
       was producing SYSsysr1 instead of sysr1.
       Bug was introduced during overflow cleanup earlier this year.
       
       Also bring regexec.c and rregexec.c into sync again.
       Also allocate large enough lists in the regexec2/rregexec2 case.
       
       Diffstat:
         M src/libregexp/regaux.c              |     105 +++++++++++++++----------------
         M src/libregexp/regcomp.c             |      10 +++++++---
         M src/libregexp/regcomp.h             |       6 +++---
         M src/libregexp/regexec.c             |      48 +++++++++++++------------------
         M src/libregexp/rregexec.c            |      61 ++++++++++++++++++-------------
       
       5 files changed, 117 insertions(+), 113 deletions(-)
       ---
   DIR diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c
       t@@ -23,90 +23,89 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
        }
        
        /*
       - * Note optimization in _renewthread:
       - *         *lp must be pending when _renewthread called; if *l has been looked
       - *                at already, the optimization is a bug.
       + * Add ip to the list [lp, elp], but only if it is not there already.
       + * These work lists are stored and processed in increasing
       + * order of sp[0], so if the ip is there already, the one that's
       + * there already is a more left match and takes priority.
         */
       -extern Relist*
       -_renewthread(Relist *lp,        /* _relist to add to */
       +static Relist*
       +_renewthread1(Relist *lp,        /* Relist to add to */
       +        Relist *elp,                /* limit pointer for Relist */
                Reinst *ip,                /* instruction to add */
                int ms,
                Resublist *sep)                /* pointers to subexpressions */
        {
                Relist *p;
        
       -        for(p=lp; p->inst; p++){
       -                if(p->inst == ip){
       -                        if(sep->m[0].s.sp < p->se.m[0].s.sp){
       -                                if(ms > 1)
       -                                        p->se = *sep;
       -                                else
       -                                        p->se.m[0] = sep->m[0];
       -                        }
       +        for(p=lp; p->inst; p++)
       +                if(p->inst == ip)
                                return 0;
       -                }
       -        }
       +        
       +        if(p == elp)        /* refuse to overflow buffer */
       +                return elp;
       +
                p->inst = ip;
                if(ms > 1)
                        p->se = *sep;
                else
                        p->se.m[0] = sep->m[0];
       -        (++p)->inst = 0;
       +        (p+1)->inst = 0;
                return p;
        }
        
       +extern int
       +_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep)
       +{
       +        Relist *ap;
       +
       +        ap = _renewthread1(lp, elp, ip, ms, sep);
       +        if(ap == 0)
       +                return 0;
       +        if(ap == elp)
       +                return -1;
       +
       +        /*
       +         * Added ip to list at ap.  
       +         * Expand any ORs right now, so that entire
       +         * work list ends up being sorted by increasing m[0].sp.
       +         */
       +        for(; ap->inst; ap++){
       +                if(ap->inst->type == OR){
       +                        if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp)
       +                                return -1;
       +                        if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp)
       +                                return -1;
       +                }
       +        }
       +        return 0;
       +}
       +
        /*
         * same as renewthread, but called with
         * initial empty start pointer.
         */
       -extern Relist*
       +extern int
        _renewemptythread(Relist *lp,        /* _relist to add to */
       +        Relist *elp,
                Reinst *ip,                /* instruction to add */
                int ms,
                char *sp)                /* pointers to subexpressions */
        {
       -        Relist *p;
       -
       -        for(p=lp; p->inst; p++){
       -                if(p->inst == ip){
       -                        if(sp < p->se.m[0].s.sp) {
       -                                if(ms > 1)
       -                                        memset(&p->se, 0, sizeof(p->se));
       -                                p->se.m[0].s.sp = sp;
       -                        }
       -                        return 0;
       -                }
       -        }
       -        p->inst = ip;
       +        Resublist sep;
       +        
                if(ms > 1)
       -                memset(&p->se, 0, sizeof(p->se));
       -        p->se.m[0].s.sp = sp;
       -        (++p)->inst = 0;
       -        return p;
       +                memset(&sep, 0, sizeof sep);
       +        sep.m[0].s.sp = sp;
       +        sep.m[0].e.ep = 0;
       +        return _renewthread(lp, elp, ip, ms, &sep);
        }
        
       -extern Relist*
       +extern int
        _rrenewemptythread(Relist *lp,        /* _relist to add to */
       +        Relist *elp,
                Reinst *ip,                /* instruction to add */
                int ms,
                Rune *rsp)                /* pointers to subexpressions */
        {
       -        Relist *p;
       -
       -        for(p=lp; p->inst; p++){
       -                if(p->inst == ip){
       -                        if(rsp < p->se.m[0].s.rsp) {
       -                                if(ms > 1)
       -                                        memset(&p->se, 0, sizeof(p->se));
       -                                p->se.m[0].s.rsp = rsp;
       -                        }
       -                        return 0;
       -                }
       -        }
       -        p->inst = ip;
       -        if(ms > 1)
       -                memset(&p->se, 0, sizeof(p->se));
       -        p->se.m[0].s.rsp = rsp;
       -        (++p)->inst = 0;
       -        return p;
       +        return _renewemptythread(lp, elp, ip, ms, (char*)rsp);
        }
   DIR diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c
       t@@ -232,7 +232,7 @@ optimize(Reprog *pp)
                int size;
                Reprog *npp;
                Reclass *cl;
       -        int diff;
       +        int diff, proglen;
        
                /*
                 *  get rid of NOOP chains
       t@@ -249,10 +249,13 @@ optimize(Reprog *pp)
                 *  necessary.  Reallocate to the actual space used
                 *  and then relocate the code.
                 */
       -        size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
       +        proglen = freep - pp->firstinst;
       +        size = sizeof(Reprog) + proglen*sizeof(Reinst);
                npp = realloc(pp, size);
       -        if(npp==0 || npp==pp)
       +        if(npp==0 || npp==pp){
       +                pp->proglen = proglen;
                        return pp;
       +        }
                diff = (char *)npp - (char *)pp;
                freep = (Reinst *)((char *)freep + diff);
                for(inst=npp->firstinst; inst<freep; inst++){
       t@@ -273,6 +276,7 @@ optimize(Reprog *pp)
                        *(char**)(void*)&inst->u2.left += diff;
                }
                *(char**)(void*)&npp->startinst += diff;
       +        npp->proglen = proglen;
                return npp;
        }
        
   DIR diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h
       t@@ -68,7 +68,7 @@ struct        Reljunk
                Rune*        reol;
        };
        
       -extern Relist*        _renewthread(Relist*, Reinst*, int, Resublist*);
       +extern int        _renewthread(Relist*, Relist*, Reinst*, int, Resublist*);
        extern void        _renewmatch(Resub*, int, Resublist*);
       -extern Relist*        _renewemptythread(Relist*, Reinst*, int, char*);
       -extern Relist*        _rrenewemptythread(Relist*, Reinst*, int, Rune*);
       +extern int        _renewemptythread(Relist*, Relist*, Reinst*, int, char*);
       +extern int        _rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*);
   DIR diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c
       t@@ -2,7 +2,6 @@
        #include "regexp9.h"
        #include "regcomp.h"
        
       -
        /*
         *  return        0 if no match
         *                >0 if a match
       t@@ -13,16 +12,14 @@ regexec1(Reprog *progp,        /* program to run */
                char *bol,        /* string to run machine on */
                Resub *mp,        /* subexpression elements */
                int ms,                /* number of elements at mp */
       -        Reljunk *j
       -)
       +        Reljunk *j)
        {
                int flag=0;
                Reinst *inst;
                Relist *tlp;
                char *s;
       -        int i, checkstart;
       +        int i, checkstart, n;
                Rune r, *rp, *ep;
       -        int n;
                Relist* tl;                /* This list, next list */
                Relist* nl;
                Relist* tle;                /* ends of this and next list */
       t@@ -48,7 +45,7 @@ regexec1(Reprog *progp,        /* program to run */
                                switch(j->starttype) {
                                case RUNE:
                                        p = utfrune(s, j->startchar);
       -                                if(p == 0 || s == j->eol)
       +                                if(p == 0 || (j->eol && p >= j->eol))
                                                return match;
                                        s = p;
                                        break;
       t@@ -56,7 +53,7 @@ regexec1(Reprog *progp,        /* program to run */
                                        if(s == bol)
                                                break;
                                        p = utfrune(s, '\n');
       -                                if(p == 0 || s == j->eol)
       +                                if(p == 0 || (j->eol && p+1 >= j->eol))
                                                return match;
                                        s = p+1;
                                        break;
       t@@ -77,17 +74,16 @@ regexec1(Reprog *progp,        /* program to run */
        
                        /* Add first instruction to current list */
                        if(match == 0)
       -                        _renewemptythread(tl, progp->startinst, ms, s);
       +                        _renewemptythread(tl, tle, progp->startinst, ms, s);
        
                        /* Execute machine until current list is empty */
       -                for(tlp=tl; tlp->inst; tlp++){        /* assignment = */
       +                for(tlp=tl; tlp->inst; tlp++){
                                for(inst = tlp->inst; ; inst = inst->u2.next){
                                        switch(inst->type){
                                        case RUNE:        /* regular character */
       -                                        if(inst->u1.r == r){
       -                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                        if(inst->u1.r == r)
       +                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
       -                                        }
                                                break;
                                        case LBRA:
                                                tlp->se.m[inst->u1.subid].s.sp = s;
       t@@ -97,11 +93,11 @@ regexec1(Reprog *progp,        /* program to run */
                                                continue;
                                        case ANY:
                                                if(r != '\n')
       -                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
                                                break;
                                        case ANYNL:
       -                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
                                                break;
                                        case BOL:
       t@@ -116,7 +112,7 @@ regexec1(Reprog *progp,        /* program to run */
                                                ep = inst->u1.cp->end;
                                                for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
                                                        if(r >= rp[0] && r <= rp[1]){
       -                                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                        return -1;
                                                                break;
                                                        }
       t@@ -127,15 +123,12 @@ regexec1(Reprog *progp,        /* program to run */
                                                        if(r >= rp[0] && r <= rp[1])
                                                                break;
                                                if(rp == ep)
       -                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
                                                break;
                                        case OR:
       -                                        /* evaluate right choice later */
       -                                        if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
       -                                                return -1;
       -                                        /* efficiency: advance and re-evaluate */
       -                                        continue;
       +                                        /* expanded during renewthread; just a place holder */
       +                                        break;
                                        case END:        /* Match! */
                                                match = 1;
                                                tlp->se.m[0].e.ep = s;
       t@@ -165,19 +158,18 @@ regexec2(Reprog *progp,        /* program to run */
                int rv;
                Relist *relist0, *relist1;
        
       -        /* mark space */
       -        relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
       +        relist0 = malloc((progp->proglen+1)*sizeof(Relist));
                if(relist0 == nil)
                        return -1;
       -        relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
       +        relist1 = malloc((progp->proglen+1)*sizeof(Relist));
                if(relist1 == nil){
                        free(relist1);
                        return -1;
                }
                j->relist[0] = relist0;
                j->relist[1] = relist1;
       -        j->reliste[0] = relist0 + BIGLISTSIZE - 2;
       -        j->reliste[1] = relist1 + BIGLISTSIZE - 2;
       +        j->reliste[0] = relist0 + progp->proglen;
       +        j->reliste[1] = relist1 + progp->proglen;
        
                rv = regexec1(progp, bol, mp, ms, j);
                free(relist0);
       t@@ -218,8 +210,8 @@ regexec(Reprog *progp,        /* program to run */
                /* mark space */
                j.relist[0] = relist0;
                j.relist[1] = relist1;
       -        j.reliste[0] = relist0 + nelem(relist0) - 2;
       -        j.reliste[1] = relist1 + nelem(relist1) - 2;
       +        j.reliste[0] = relist0 + nelem(relist0) - 1;
       +        j.reliste[1] = relist1 + nelem(relist1) - 1;
        
                rv = regexec1(progp, bol, mp, ms, &j);
                if(rv >= 0)
   DIR diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c
       t@@ -9,9 +9,9 @@
         */
        static int
        rregexec1(Reprog *progp,        /* program to run */
       -        Rune *bol,                /* string to run machine on */
       -        Resub *mp,                /* subexpression elements */
       -        int ms,                        /* number of elements at mp */
       +        Rune *bol,        /* string to run machine on */
       +        Resub *mp,        /* subexpression elements */
       +        int ms,                /* number of elements at mp */
                Reljunk *j)
        {
                int flag=0;
       t@@ -28,7 +28,7 @@ rregexec1(Reprog *progp,        /* program to run */
                Rune *p;
        
                match = 0;
       -        checkstart = j->startchar;
       +        checkstart = j->starttype;
                if(mp)
                        for(i=0; i<ms; i++) {
                                mp[i].s.rsp = 0;
       t@@ -46,7 +46,7 @@ rregexec1(Reprog *progp,        /* program to run */
                                switch(j->starttype) {
                                case RUNE:
                                        p = runestrchr(s, j->startchar);
       -                                if(p == 0 || p == j->reol)
       +                                if(p == 0 || (j->reol && p >= j->reol))
                                                return match;
                                        s = p;
                                        break;
       t@@ -54,7 +54,7 @@ rregexec1(Reprog *progp,        /* program to run */
                                        if(s == bol)
                                                break;
                                        p = runestrchr(s, '\n');
       -                                if(p == 0 || s == j->reol)
       +                                if(p == 0 || (j->reol && p+1 >= j->reol))
                                                return match;
                                        s = p+1;
                                        break;
       t@@ -71,15 +71,16 @@ rregexec1(Reprog *progp,        /* program to run */
                        nl->inst = 0;
        
                        /* Add first instruction to current list */
       -                _rrenewemptythread(tl, progp->startinst, ms, s);
       +                if(match == 0)
       +                        _rrenewemptythread(tl, tle, progp->startinst, ms, s);
        
                        /* Execute machine until current list is empty */
                        for(tlp=tl; tlp->inst; tlp++){
       -                        for(inst=tlp->inst; ; inst = inst->u2.next){
       +                        for(inst = tlp->inst; ; inst = inst->u2.next){
                                        switch(inst->type){
                                        case RUNE:        /* regular character */
                                                if(inst->u1.r == r)
       -                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
                                                break;
                                        case LBRA:
       t@@ -90,11 +91,11 @@ rregexec1(Reprog *progp,        /* program to run */
                                                continue;
                                        case ANY:
                                                if(r != '\n')
       -                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
                                                break;
                                        case ANYNL:
       -                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
                                                break;
                                        case BOL:
       t@@ -109,7 +110,7 @@ rregexec1(Reprog *progp,        /* program to run */
                                                ep = inst->u1.cp->end;
                                                for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
                                                        if(r >= rp[0] && r <= rp[1]){
       -                                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                        return -1;
                                                                break;
                                                        }
       t@@ -120,15 +121,12 @@ rregexec1(Reprog *progp,        /* program to run */
                                                        if(r >= rp[0] && r <= rp[1])
                                                                break;
                                                if(rp == ep)
       -                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
       +                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
                                                                return -1;
                                                break;
                                        case OR:
       -                                        /* evaluate right choice later */
       -                                        if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
       -                                                return -1;
       -                                        /* efficiency: advance and re-evaluate */
       -                                        continue;
       +                                        /* expanded during renewthread; just a place holder */
       +                                        break;
                                        case END:        /* Match! */
                                                match = 1;
                                                tlp->se.m[0].e.rep = s;
       t@@ -141,7 +139,7 @@ rregexec1(Reprog *progp,        /* program to run */
                        }
                        if(s == j->reol)
                                break;
       -                checkstart = j->startchar && nl->inst==0;
       +                checkstart = j->starttype && nl->inst==0;
                        s++;
                }while(r);
                return match;
       t@@ -155,15 +153,26 @@ rregexec2(Reprog *progp,        /* program to run */
                Reljunk *j
        )
        {
       -        Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
       +        int rv;
       +        Relist *relist0, *relist1;
        
       -        /* mark space */
       +        relist0 = malloc((progp->proglen+1)*sizeof(Relist));
       +        if(relist0 == nil)
       +                return -1;
       +        relist1 = malloc((progp->proglen+1)*sizeof(Relist));
       +        if(relist1 == nil){
       +                free(relist1);
       +                return -1;
       +        }
                j->relist[0] = relist0;
                j->relist[1] = relist1;
       -        j->reliste[0] = relist0 + nelem(relist0) - 2;
       -        j->reliste[1] = relist1 + nelem(relist1) - 2;
       +        j->reliste[0] = relist0 + progp->proglen;
       +        j->reliste[1] = relist1 + progp->proglen;
        
       -        return rregexec1(progp, bol, mp, ms, j);
       +        rv = rregexec1(progp, bol, mp, ms, j);
       +        free(relist0);
       +        free(relist1);
       +        return rv;
        }
        
        extern int
       t@@ -199,8 +208,8 @@ rregexec(Reprog *progp,        /* program to run */
                /* mark space */
                j.relist[0] = relist0;
                j.relist[1] = relist1;
       -        j.reliste[0] = relist0 + nelem(relist0) - 2;
       -        j.reliste[1] = relist1 + nelem(relist1) - 2;
       +        j.reliste[0] = relist0 + nelem(relist0) - 1;
       +        j.reliste[1] = relist1 + nelem(relist1) - 1;
        
                rv = rregexec1(progp, bol, mp, ms, &j);
                if(rv >= 0)