URI: 
       tsed.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tsed.c (27238B)
       ---
            1 /*
            2  * sed -- stream  editor
            3  *
            4  *
            5  */
            6 #include <u.h>
            7 #include <libc.h>
            8 #include <bio.h>
            9 #include <regexp.h>
           10 
           11 enum {
           12         DEPTH                = 20,                /* max nesting depth of {} */
           13         MAXCMDS                = 512,                /* max sed commands */
           14         ADDSIZE                = 10000,        /* size of add & read buffer */
           15         MAXADDS                = 20,                /* max pending adds and reads */
           16         LBSIZE                = 8192,                /* input line size */
           17         LABSIZE                = 50,                /* max label name size */
           18         MAXSUB                = 10,                /* max number of sub reg exp */
           19         MAXFILES        = 120                /* max output files */
           20 };
           21         /* An address is a line #, a R.E., "$", a reference to the last
           22          * R.E., or nothing.
           23          */
           24 typedef struct        {
           25         enum {
           26                 A_NONE,
           27                 A_DOL,
           28                 A_LINE,
           29                 A_RE,
           30                 A_LAST
           31         }type;
           32         union {
           33                 long line;                /* Line # */
           34                 Reprog *rp;                /* Compiled R.E. */
           35         } u;
           36 } Addr;
           37 
           38 typedef struct        SEDCOM {
           39         Addr        ad1;                        /* optional start address */
           40         Addr        ad2;                        /* optional end address */
           41         union {
           42                 Reprog        *re1;                /* compiled R.E. */
           43                 Rune        *text;                /* added text or file name */
           44                 struct        SEDCOM        *lb1;        /* destination command of branch */
           45         } u;
           46         Rune        *rhs;                        /* Right-hand side of substitution */
           47         Biobuf*        fcode;                        /* File ID for read and write */
           48         char        command;                /* command code -see below */
           49         char        gfl;                        /* 'Global' flag for substitutions */
           50         char        pfl;                        /* 'print' flag for substitutions */
           51         char        active;                        /* 1 => data between start and end */
           52         char        negfl;                        /* negation flag */
           53 } SedCom;
           54 
           55         /* Command Codes for field SedCom.command */
           56 #define ACOM        01
           57 #define BCOM        020
           58 #define CCOM        02
           59 #define        CDCOM        025
           60 #define        CNCOM        022
           61 #define COCOM        017
           62 #define        CPCOM        023
           63 #define DCOM        03
           64 #define ECOM        015
           65 #define EQCOM        013
           66 #define FCOM        016
           67 #define GCOM        027
           68 #define CGCOM        030
           69 #define HCOM        031
           70 #define CHCOM        032
           71 #define ICOM        04
           72 #define LCOM        05
           73 #define NCOM        012
           74 #define PCOM        010
           75 #define QCOM        011
           76 #define RCOM        06
           77 #define SCOM        07
           78 #define TCOM        021
           79 #define WCOM        014
           80 #define        CWCOM        024
           81 #define        YCOM        026
           82 #define XCOM        033
           83 
           84 
           85 typedef struct label {                        /* Label symbol table */
           86         Rune        asc[9];                        /* Label name */
           87         SedCom        *chain;
           88         SedCom        *address;                /* Command associated with label */
           89 } Label;
           90 
           91 typedef        struct        FILE_CACHE {                /* Data file control block */
           92         struct FILE_CACHE *next;        /* Forward Link */
           93         char                   *name;        /* Name of file */
           94 } FileCache;
           95 
           96 SedCom pspace[MAXCMDS];                        /* Command storage */
           97 SedCom *pend = pspace+MAXCMDS;                /* End of command storage */
           98 SedCom *rep = pspace;                        /* Current fill point */
           99 
          100 Reprog        *lastre = 0;                        /* Last regular expression */
          101 Resub        subexp[MAXSUB];                        /* sub-patterns of pattern match*/
          102 
          103 Rune        addspace[ADDSIZE];                /* Buffer for a, c, & i commands */
          104 Rune        *addend = addspace+ADDSIZE;
          105 
          106 SedCom        *abuf[MAXADDS];                        /* Queue of pending adds & reads */
          107 SedCom        **aptr = abuf;
          108 
          109 struct {                                /* Sed program input control block */
          110         enum PTYPE                         /* Either on command line or in file */
          111                 { P_ARG,
          112                   P_FILE
          113                 } type;
          114         union PCTL {                        /* Pointer to data */
          115                 Biobuf        *bp;
          116                 char        *curr;
          117         } pctl;
          118 } prog;
          119 
          120 Rune        genbuf[LBSIZE];                        /* Miscellaneous buffer */
          121 
          122 FileCache        *fhead = 0;                /* Head of File Cache Chain */
          123 FileCache        *ftail = 0;                /* Tail of File Cache Chain */
          124 
          125 Rune        *loc1;                                /* Start of pattern match */
          126 Rune        *loc2;                                /* End of pattern match */
          127 Rune        seof;                                /* Pattern delimiter char */
          128 
          129 Rune        linebuf[LBSIZE+1];                /* Input data buffer */
          130 Rune        *lbend = linebuf+LBSIZE;        /* End of buffer */
          131 Rune        *spend = linebuf;                        /* End of input data */
          132 Rune        *cp;                                /* Current scan point in linebuf */
          133 
          134 Rune        holdsp[LBSIZE+1];                /* Hold buffer */
          135 Rune        *hend = holdsp+LBSIZE;                /* End of hold buffer */
          136 Rune        *hspend = holdsp;                /* End of hold data */
          137 
          138 int        nflag;                                /* Command line flags */
          139 int        gflag;
          140 int        lflag;
          141 
          142 int        dolflag;                        /* Set when at true EOF */
          143 int        sflag;                                /* Set when substitution done */
          144 int        jflag;                                /* Set when jump required */
          145 int        delflag;                        /* Delete current line when set */
          146 
          147 long        lnum = 0;                        /* Input line count */
          148 
          149 char        fname[MAXFILES][40];                /* File name cache */
          150 Biobuf        *fcode[MAXFILES];                /* File ID cache */
          151 int        nfiles = 0;                        /* Cache fill point */
          152 
          153 Biobuf        fout;                                /* Output stream */
          154 Biobuf        bstdin;                                /* Default input */
          155 Biobuf*        f = 0;                                /* Input data */
          156 
          157 Label        ltab[LABSIZE];                        /* Label name symbol table */
          158 Label        *labend = ltab+LABSIZE;                /* End of label table */
          159 Label        *lab = ltab+1;                        /* Current Fill point */
          160 
          161 int        depth = 0;                        /* {} stack pointer */
          162 
          163 Rune        bad;                                /* Dummy err ptr reference */
          164 Rune        *badp = &bad;
          165 
          166 
          167 char        CGMES[]         =         "Command garbled: %S";
          168 char        TMMES[]         =         "Too much text: %S";
          169 char        LTL[]         =         "Label too long: %S";
          170 char        AD0MES[] =        "No addresses allowed: %S";
          171 char        AD1MES[] =        "Only one address allowed: %S";
          172 
          173 void        address(Addr *);
          174 void        arout(void);
          175 int        cmp(char *, char *);
          176 int        rcmp(Rune *, Rune *);
          177 void        command(SedCom *);
          178 Reprog        *compile(void);
          179 Rune        *compsub(Rune *, Rune *);
          180 void        dechain(void);
          181 void        dosub(Rune *);
          182 int        ecmp(Rune *, Rune *, int);
          183 void        enroll(char *);
          184 void        errexit(void);
          185 int        executable(SedCom *);
          186 void        execute(void);
          187 void        fcomp(void);
          188 long        getrune(void);
          189 Rune        *gline(Rune *);
          190 int        match(Reprog *, Rune *);
          191 void        newfile(enum PTYPE, char *);
          192 int         opendata(void);
          193 Biobuf        *open_file(char *);
          194 Rune        *place(Rune *, Rune *, Rune *);
          195 void        quit(char *, char *);
          196 int        rline(Rune *, Rune *);
          197 Label        *search(Label *);
          198 int        substitute(SedCom *);
          199 char        *text(char *);
          200 Rune        *stext(Rune *, Rune *);
          201 int        ycomp(SedCom *);
          202 char *        trans(int c);
          203 void        putline(Biobuf *bp, Rune *buf, int n);
          204 
          205 void
          206 main(int argc, char **argv)
          207 {
          208         int        compfl;
          209 
          210         lnum = 0;
          211         Binit(&fout, 1, OWRITE);
          212         fcode[nfiles++] = &fout;
          213         compfl = 0;
          214 
          215         if(argc == 1)
          216                 exits(0);
          217         ARGBEGIN{
          218                 case 'n':
          219                         nflag++;
          220                         continue;
          221                 case 'f':
          222                         if(argc <= 1)
          223                                 quit("no pattern-file", 0);
          224                         newfile(P_FILE, ARGF());
          225                         fcomp();
          226                         compfl = 1;
          227                         continue;
          228                 case 'e':
          229                         if (argc <= 1)
          230                                 quit("missing pattern", 0);
          231                         newfile(P_ARG, ARGF());
          232                         fcomp();
          233                         compfl = 1;
          234                         continue;
          235                 case 'g':
          236                         gflag++;
          237                         continue;
          238                 case 'l':
          239                         lflag++;
          240                         continue;
          241                 default:
          242                         fprint(2, "sed: Unknown flag: %c\n", ARGC());
          243                         continue;
          244         } ARGEND
          245 
          246         if(compfl == 0) {
          247                 if (--argc < 0)
          248                         quit("missing pattern", 0);
          249                 newfile(P_ARG, *argv++);
          250                 fcomp();
          251         }
          252 
          253         if(depth)
          254                 quit("Too many {'s", 0);
          255 
          256         ltab[0].address = rep;
          257 
          258         dechain();
          259 
          260         if(argc <= 0)
          261                 enroll(0);                /* Add stdin to cache */
          262         else while(--argc >= 0) {
          263                 enroll(*argv++);
          264         }
          265         execute();
          266         exits(0);
          267 }
          268 void
          269 fcomp(void)
          270 {
          271         Rune        *tp;
          272         SedCom        *pt, *pt1;
          273         int        i;
          274         Label        *lpt;
          275 
          276         static Rune        *p = addspace;
          277         static SedCom        **cmpend[DEPTH];        /* stack of {} operations */
          278 
          279         while (rline(linebuf, lbend) >= 0) {
          280                 cp = linebuf;
          281 comploop:
          282                 while(*cp == ' ' || *cp == '\t')
          283                         cp++;
          284                 if(*cp == '\0' || *cp == '#')
          285                         continue;
          286                 if(*cp == ';') {
          287                         cp++;
          288                         goto comploop;
          289                 }
          290 
          291                 address(&rep->ad1);
          292                 if (rep->ad1.type != A_NONE) {
          293                         if (rep->ad1.type == A_LAST) {
          294                                 if (!lastre)
          295                                         quit("First RE may not be null", 0);
          296                                 rep->ad1.type = A_RE;
          297                                 rep->ad1.u.rp = lastre;
          298                         }
          299                         if(*cp == ',' || *cp == ';') {
          300                                 cp++;
          301                                 address(&rep->ad2);
          302                                 if (rep->ad2.type == A_LAST) {
          303                                         rep->ad1.type = A_RE;
          304                                         rep->ad2.u.rp = lastre;
          305                                 }
          306                         } else
          307                                 rep->ad2.type = A_NONE;
          308                 }
          309                 while(*cp == ' ' || *cp == '\t')
          310                         cp++;
          311 
          312 swit:
          313                 switch(*cp++) {
          314 
          315                         default:
          316                                 quit("Unrecognized command: %S", (char *)linebuf);
          317 
          318                         case '!':
          319                                 rep->negfl = 1;
          320                                 goto swit;
          321 
          322                         case '{':
          323                                 rep->command = BCOM;
          324                                 rep->negfl = !(rep->negfl);
          325                                 cmpend[depth++] = &rep->u.lb1;
          326                                 if(++rep >= pend)
          327                                         quit("Too many commands: %S", (char *) linebuf);
          328                                 if(*cp == '\0')        continue;
          329                                 goto comploop;
          330 
          331                         case '}':
          332                                 if(rep->ad1.type != A_NONE)
          333                                         quit(AD0MES, (char *) linebuf);
          334                                 if(--depth < 0)
          335                                         quit("Too many }'s", 0);
          336                                 *cmpend[depth] = rep;
          337                                 if(*cp == 0)        continue;
          338                                 goto comploop;
          339 
          340                         case '=':
          341                                 rep->command = EQCOM;
          342                                 if(rep->ad2.type != A_NONE)
          343                                         quit(AD1MES, (char *) linebuf);
          344                                 break;
          345 
          346                         case ':':
          347                                 if(rep->ad1.type != A_NONE)
          348                                         quit(AD0MES, (char *) linebuf);
          349 
          350                                 while(*cp == ' ')
          351                                         cp++;
          352                                 tp = lab->asc;
          353                                 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
          354                                         *tp++ = *cp++;
          355                                         if(tp >= &(lab->asc[8]))
          356                                                 quit(LTL, (char *) linebuf);
          357                                 }
          358                                 *tp = '\0';
          359 
          360                                 if(lpt = search(lab)) {
          361                                         if(lpt->address)
          362                                                 quit("Duplicate labels: %S", (char *) linebuf);
          363                                 } else {
          364                                         lab->chain = 0;
          365                                         lpt = lab;
          366                                         if(++lab >= labend)
          367                                                 quit("Too many labels: %S", (char *) linebuf);
          368                                 }
          369                                 lpt->address = rep;
          370                                 if (*cp == '#')
          371                                         continue;
          372                                 rep--;                        /* reuse this slot */
          373                                 break;
          374 
          375                         case 'a':
          376                                 rep->command = ACOM;
          377                                 if(rep->ad2.type != A_NONE)
          378                                         quit(AD1MES, (char *) linebuf);
          379                                 if(*cp == '\\')        cp++;
          380                                 if(*cp++ != '\n')
          381                                         quit(CGMES, (char *) linebuf);
          382                                 rep->u.text = p;
          383                                 p = stext(p, addend);
          384                                 break;
          385                         case 'c':
          386                                 rep->command = CCOM;
          387                                 if(*cp == '\\')        cp++;
          388                                 if(*cp++ != '\n')
          389                                         quit(CGMES, (char *) linebuf);
          390                                 rep->u.text = p;
          391                                 p = stext(p, addend);
          392                                 break;
          393                         case 'i':
          394                                 rep->command = ICOM;
          395                                 if(rep->ad2.type != A_NONE)
          396                                         quit(AD1MES, (char *) linebuf);
          397                                 if(*cp == '\\')        cp++;
          398                                 if(*cp++ != '\n')
          399                                         quit(CGMES, (char *) linebuf);
          400                                 rep->u.text = p;
          401                                 p = stext(p, addend);
          402                                 break;
          403 
          404                         case 'g':
          405                                 rep->command = GCOM;
          406                                 break;
          407 
          408                         case 'G':
          409                                 rep->command = CGCOM;
          410                                 break;
          411 
          412                         case 'h':
          413                                 rep->command = HCOM;
          414                                 break;
          415 
          416                         case 'H':
          417                                 rep->command = CHCOM;
          418                                 break;
          419 
          420                         case 't':
          421                                 rep->command = TCOM;
          422                                 goto jtcommon;
          423 
          424                         case 'b':
          425                                 rep->command = BCOM;
          426 jtcommon:
          427                                 while(*cp == ' ')cp++;
          428                                 if(*cp == '\0') {
          429                                         if(pt = ltab[0].chain) {
          430                                                 while(pt1 = pt->u.lb1)
          431                                                         pt = pt1;
          432                                                 pt->u.lb1 = rep;
          433                                         } else
          434                                                 ltab[0].chain = rep;
          435                                         break;
          436                                 }
          437                                 tp = lab->asc;
          438                                 while((*tp++ = *cp++))
          439                                         if(tp >= &(lab->asc[8]))
          440                                                 quit(LTL, (char *) linebuf);
          441                                 cp--;
          442                                 tp[-1] = '\0';
          443 
          444                                 if(lpt = search(lab)) {
          445                                         if(lpt->address) {
          446                                                 rep->u.lb1 = lpt->address;
          447                                         } else {
          448                                                 pt = lpt->chain;
          449                                                 while(pt1 = pt->u.lb1)
          450                                                         pt = pt1;
          451                                                 pt->u.lb1 = rep;
          452                                         }
          453                                 } else {
          454                                         lab->chain = rep;
          455                                         lab->address = 0;
          456                                         if(++lab >= labend)
          457                                                 quit("Too many labels: %S",
          458                                                         (char *) linebuf);
          459                                 }
          460                                 break;
          461 
          462                         case 'n':
          463                                 rep->command = NCOM;
          464                                 break;
          465 
          466                         case 'N':
          467                                 rep->command = CNCOM;
          468                                 break;
          469 
          470                         case 'p':
          471                                 rep->command = PCOM;
          472                                 break;
          473 
          474                         case 'P':
          475                                 rep->command = CPCOM;
          476                                 break;
          477 
          478                         case 'r':
          479                                 rep->command = RCOM;
          480                                 if(rep->ad2.type != A_NONE)
          481                                         quit(AD1MES, (char *) linebuf);
          482                                 if(*cp++ != ' ')
          483                                         quit(CGMES, (char *) linebuf);
          484                                 rep->u.text = p;
          485                                 p = stext(p, addend);
          486                                 break;
          487 
          488                         case 'd':
          489                                 rep->command = DCOM;
          490                                 break;
          491 
          492                         case 'D':
          493                                 rep->command = CDCOM;
          494                                 rep->u.lb1 = pspace;
          495                                 break;
          496 
          497                         case 'q':
          498                                 rep->command = QCOM;
          499                                 if(rep->ad2.type != A_NONE)
          500                                         quit(AD1MES, (char *) linebuf);
          501                                 break;
          502 
          503                         case 'l':
          504                                 rep->command = LCOM;
          505                                 break;
          506 
          507                         case 's':
          508                                 rep->command = SCOM;
          509                                 seof = *cp++;
          510                                 if ((rep->u.re1 = compile()) == 0) {
          511                                         if(!lastre)
          512                                                 quit("First RE may not be null.", 0);
          513                                         rep->u.re1 = lastre;
          514                                 }
          515                                 rep->rhs = p;
          516                                 if((p = compsub(p, addend)) == 0)
          517                                         quit(CGMES, (char *) linebuf);
          518                                 if(*cp == 'g') {
          519                                         cp++;
          520                                         rep->gfl++;
          521                                 } else if(gflag)
          522                                         rep->gfl++;
          523 
          524                                 if(*cp == 'p') {
          525                                         cp++;
          526                                         rep->pfl = 1;
          527                                 }
          528 
          529                                 if(*cp == 'P') {
          530                                         cp++;
          531                                         rep->pfl = 2;
          532                                 }
          533 
          534                                 if(*cp == 'w') {
          535                                         cp++;
          536                                         if(*cp++ !=  ' ')
          537                                                 quit(CGMES, (char *) linebuf);
          538                                         text(fname[nfiles]);
          539                                         for(i = nfiles - 1; i >= 0; i--)
          540                                                 if(cmp(fname[nfiles],fname[i]) == 0) {
          541                                                         rep->fcode = fcode[i];
          542                                                         goto done;
          543                                                 }
          544                                         if(nfiles >= MAXFILES)
          545                                                 quit("Too many files in w commands 1", 0);
          546                                         rep->fcode = open_file(fname[nfiles]);
          547                                 }
          548                                 break;
          549 
          550                         case 'w':
          551                                 rep->command = WCOM;
          552                                 if(*cp++ != ' ')
          553                                         quit(CGMES, (char *) linebuf);
          554                                 text(fname[nfiles]);
          555                                 for(i = nfiles - 1; i >= 0; i--)
          556                                         if(cmp(fname[nfiles], fname[i]) == 0) {
          557                                                 rep->fcode = fcode[i];
          558                                                 goto done;
          559                                         }
          560                                 if(nfiles >= MAXFILES){
          561                                         fprint(2, "sed: Too many files in w commands 2 \n");
          562                                         fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
          563                                         errexit();
          564                                 }
          565                                 rep->fcode = open_file(fname[nfiles]);
          566                                 break;
          567 
          568                         case 'x':
          569                                 rep->command = XCOM;
          570                                 break;
          571 
          572                         case 'y':
          573                                 rep->command = YCOM;
          574                                 seof = *cp++;
          575                                 if (ycomp(rep) == 0)
          576                                         quit(CGMES, (char *) linebuf);
          577                                 break;
          578 
          579                 }
          580 done:
          581                 if(++rep >= pend)
          582                         quit("Too many commands, last: %S", (char *) linebuf);
          583 
          584                 if(*cp++ != '\0') {
          585                         if(cp[-1] == ';')
          586                                 goto comploop;
          587                         quit(CGMES, (char *) linebuf);
          588                 }
          589 
          590         }
          591 }
          592 
          593 Biobuf *
          594 open_file(char *name)
          595 {
          596         Biobuf *bp;
          597         int fd;
          598 
          599         if ((bp = malloc(sizeof(Biobuf))) == 0)
          600                 quit("Out of memory", 0);
          601         if ((fd = open(name, OWRITE)) < 0 &&
          602                 (fd = create(name, OWRITE, 0666)) < 0)
          603                         quit("Cannot create %s", name);
          604         Binit(bp, fd, OWRITE);
          605         Bseek(bp, 0, 2);
          606         fcode[nfiles++] = bp;
          607         return bp;
          608 }
          609 
          610 Rune        *
          611 compsub(Rune *rhs, Rune *end)
          612 {
          613         Rune        r;
          614 
          615         while ((r = *cp++) != '\0') {
          616                 if(r == '\\') {
          617                         if (rhs < end)
          618                                 *rhs++ = Runemax;
          619                         else
          620                                 return 0;
          621                         r = *cp++;
          622                         if(r == 'n')
          623                                 r = '\n';
          624                 } else {
          625                         if(r == seof) {
          626                                 if (rhs < end)
          627                                         *rhs++ = '\0';
          628                                 else
          629                                         return 0;
          630                                 return rhs;
          631                         }
          632                 }
          633                 if (rhs < end)
          634                         *rhs++ = r;
          635                 else
          636                         return 0;
          637 
          638         }
          639         return 0;
          640 }
          641 
          642 Reprog *
          643 compile(void)
          644 {
          645         Rune c;
          646         char *ep;
          647         char expbuf[512];
          648 
          649         if((c = *cp++) == seof)                /* '//' */
          650                 return 0;
          651         ep = expbuf;
          652         do {
          653                 if (c == 0 || c == '\n')
          654                         quit(TMMES, (char *) linebuf);
          655                 if (c == '\\') {
          656                         if (ep >= expbuf+sizeof(expbuf))
          657                                 quit(TMMES, (char *) linebuf);
          658                         ep += runetochar(ep, &c);
          659                         if ((c = *cp++) == 'n')
          660                                 c = '\n';
          661                 }
          662                 if (ep >= expbuf+sizeof(expbuf))
          663                         quit(TMMES, (char *) linebuf);
          664                 ep += runetochar(ep, &c);
          665         } while ((c = *cp++) != seof);
          666         *ep = 0;
          667         return lastre = regcomp(expbuf);
          668 }
          669 
          670 void
          671 regerror(char *s)
          672 {
          673         USED(s);
          674         quit(CGMES, (char *) linebuf);
          675 }
          676 
          677 void
          678 newfile(enum PTYPE type, char *name)
          679 {
          680         if (type == P_ARG)
          681                 prog.pctl.curr = name;
          682         else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
          683                 quit("Cannot open pattern-file: %s\n", name);
          684         prog.type = type;
          685 }
          686 
          687 int
          688 rline(Rune *buf, Rune *end)
          689 {
          690         long c;
          691         Rune r;
          692 
          693         while ((c = getrune()) >= 0) {
          694                 r = c;
          695                 if (r == '\\') {
          696                         if (buf <= end)
          697                                 *buf++ = r;
          698                         if ((c = getrune()) < 0)
          699                                 break;
          700                         r = c;
          701                 } else if (r == '\n') {
          702                         *buf = '\0';
          703                         return(1);
          704                 }
          705                 if (buf <= end)
          706                         *buf++ = r;
          707         }
          708         *buf = '\0';
          709         return(-1);
          710 }
          711 
          712 long
          713 getrune(void)
          714 {
          715         char *p;
          716         long c;
          717         Rune r;
          718 
          719         if (prog.type == P_ARG) {
          720                 if ((p = prog.pctl.curr) != 0) {
          721                         if (*p) {
          722                                 prog.pctl.curr += chartorune(&r, p);
          723                                 c = r;
          724                         } else {
          725                                 c = '\n';        /* fake an end-of-line */
          726                                 prog.pctl.curr = 0;
          727                         }
          728                 } else
          729                         c = -1;
          730         } else if ((c = Bgetrune(prog.pctl.bp)) < 0)
          731                         Bterm(prog.pctl.bp);
          732         return c;
          733 }
          734 
          735 void
          736 address(Addr *ap)
          737 {
          738         int c;
          739         long        lno;
          740 
          741         if((c = *cp++) == '$')
          742                 ap->type = A_DOL;
          743         else if(c == '/') {
          744                 seof = c;
          745                 if (ap->u.rp = compile())
          746                         ap->type = A_RE;
          747                 else
          748                         ap->type = A_LAST;
          749         }
          750         else if (c >= '0' && c <= '9') {
          751                 lno = c-'0';
          752                 while ((c = *cp) >= '0' && c <= '9')
          753                         lno = lno*10 + *cp++-'0';
          754                 if(!lno)
          755                         quit("line number 0 is illegal",0);
          756                 ap->type = A_LINE;
          757                 ap->u.line = lno;
          758         }
          759         else {
          760                 cp--;
          761                 ap->type = A_NONE;
          762         }
          763 }
          764 
          765 int
          766 cmp(char *a, char *b)                /* compare characters */
          767 {
          768         while(*a == *b++)
          769                 if (*a == '\0')
          770                         return(0);
          771                 else a++;
          772         return(1);
          773 }
          774 
          775 int
          776 rcmp(Rune *a, Rune *b)                /* compare runes */
          777 {
          778         while(*a == *b++)
          779                 if (*a == '\0')
          780                         return(0);
          781                 else a++;
          782         return(1);
          783 }
          784 
          785 char *
          786 text(char *p)                /* extract character string */
          787 {
          788         Rune        r;
          789 
          790         while(*cp == '\t' || *cp == ' ')
          791                         cp++;
          792         while (*cp) {
          793                 if ((r = *cp++) == '\\')
          794                         if ((r = *cp++) == 0)
          795                                 break;;
          796                 if (r == '\n')
          797                         while (*cp == '\t' || *cp == ' ')
          798                                         cp++;
          799                 p += runetochar(p, &r);
          800         }
          801         *p++ = '\0';
          802         return p;
          803 }
          804 
          805 Rune *
          806 stext(Rune *p, Rune *end)                /* extract rune string */
          807 {
          808         while(*cp == '\t' || *cp == ' ')
          809                 cp++;
          810         while (*cp) {
          811                 if (*cp == '\\')
          812                         if (*++cp == 0)
          813                                 break;
          814                 if (p >= end-1)
          815                         quit(TMMES, (char *) linebuf);
          816                 if ((*p++ = *cp++) == '\n')
          817                         while(*cp == '\t' || *cp == ' ')
          818                                         cp++;
          819         }
          820         *p++ = 0;
          821         return p;
          822 }
          823 
          824 
          825 Label *
          826 search (Label *ptr)
          827 {
          828         Label        *rp;
          829 
          830         for (rp = ltab; rp < ptr; rp++)
          831                 if(rcmp(rp->asc, ptr->asc) == 0)
          832                         return(rp);
          833         return(0);
          834 }
          835 
          836 void
          837 dechain(void)
          838 {
          839         Label        *lptr;
          840         SedCom        *rptr, *trptr;
          841 
          842         for(lptr = ltab; lptr < lab; lptr++) {
          843 
          844                 if(lptr->address == 0)
          845                         quit("Undefined label: %S", (char *) lptr->asc);
          846 
          847                 if(lptr->chain) {
          848                         rptr = lptr->chain;
          849                         while(trptr = rptr->u.lb1) {
          850                                 rptr->u.lb1 = lptr->address;
          851                                 rptr = trptr;
          852                         }
          853                         rptr->u.lb1 = lptr->address;
          854                 }
          855         }
          856 }
          857 
          858 int
          859 ycomp(SedCom *r)
          860 {
          861         int         i;
          862         Rune        *rp;
          863         Rune        c, *tsp, highc;
          864         Rune        *sp;
          865 
          866         highc = 0;
          867         for(tsp = cp; *tsp != seof; tsp++) {
          868                 if(*tsp == '\\')
          869                         tsp++;
          870                 if(*tsp == '\n' || *tsp == '\0')
          871                         return(0);
          872                 if (*tsp > highc) highc = *tsp;
          873         }
          874         tsp++;
          875         if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
          876                 quit("Out of memory", 0);
          877         *rp++ = highc;                                /* save upper bound */
          878         for (i = 0; i <= highc; i++)
          879                 rp[i] = i;
          880         sp = cp;
          881         while((c = *sp++) != seof) {
          882                 if(c == '\\' && *sp == 'n') {
          883                         sp++;
          884                         c = '\n';
          885                 }
          886                 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
          887                         rp[c] = '\n';
          888                         tsp++;
          889                 }
          890                 if(rp[c] == seof || rp[c] == '\0') {
          891                         free(r->u.re1);
          892                         r->u.re1 = 0;
          893                         return(0);
          894                 }
          895         }
          896         if(*tsp != seof) {
          897                 free(r->u.re1);
          898                 r->u.re1 = 0;
          899                 return(0);
          900         }
          901         cp = tsp+1;
          902         return(1);
          903 }
          904 
          905 void
          906 execute(void)
          907 {
          908         SedCom        *ipc;
          909 
          910         while (spend = gline(linebuf)){
          911                 for(ipc = pspace; ipc->command; ) {
          912                         if (!executable(ipc)) {
          913                                 ipc++;
          914                                 continue;
          915                         }
          916                         command(ipc);
          917 
          918                         if(delflag)
          919                                 break;
          920                         if(jflag) {
          921                                 jflag = 0;
          922                                 if((ipc = ipc->u.lb1) == 0)
          923                                         break;
          924                         } else
          925                                 ipc++;
          926 
          927                 }
          928                 if(!nflag && !delflag)
          929                         putline(&fout, linebuf, spend-linebuf);
          930                 if(aptr > abuf) {
          931                         arout();
          932                 }
          933                 delflag = 0;
          934         }
          935 }
          936         /* determine if a statement should be applied to an input line */
          937 int
          938 executable(SedCom *ipc)
          939 {
          940         if (ipc->active) {        /* Addr1 satisfied - accept until Addr2 */
          941                 if (ipc->active == 1)                /* Second line */
          942                         ipc->active = 2;
          943                 switch(ipc->ad2.type) {
          944                         case A_NONE:        /* No second addr; use first */
          945                                 ipc->active = 0;
          946                                 break;
          947                         case A_DOL:        /* Accept everything */
          948                                 return !ipc->negfl;
          949                         case A_LINE:        /* Line at end of range? */
          950                                 if (lnum <= ipc->ad2.u.line) {
          951                                         if (ipc->ad2.u.line == lnum)
          952                                                 ipc->active = 0;
          953                                         return !ipc->negfl;
          954                                 }
          955                                 ipc->active = 0;        /* out of range */
          956                                 return ipc->negfl;
          957                         case A_RE:        /* Check for matching R.E. */
          958                                 if (match(ipc->ad2.u.rp, linebuf))
          959                                         ipc->active = 0;
          960                                 return !ipc->negfl;
          961                         default:                /* internal error */
          962                                 quit("Internal error", 0);
          963                 }
          964         }
          965         switch (ipc->ad1.type) {        /* Check first address */
          966                 case A_NONE:                        /* Everything matches */
          967                         return !ipc->negfl;
          968                 case A_DOL:                        /* Only last line */
          969                         if (dolflag)
          970                                 return !ipc->negfl;
          971                         break;
          972                 case A_LINE:                        /* Check line number */
          973                         if (ipc->ad1.u.line == lnum) {
          974                                 ipc->active = 1;        /* In range */
          975                                 return !ipc->negfl;
          976                         }
          977                         break;
          978                 case A_RE:                        /* Check R.E. */
          979                         if (match(ipc->ad1.u.rp, linebuf)) {
          980                                 ipc->active = 1;        /* In range */
          981                                 return !ipc->negfl;
          982                         }
          983                         break;
          984                 default:
          985                         quit("Internal error", 0);
          986         }
          987         return ipc->negfl;
          988 }
          989 
          990 int
          991 match(Reprog *pattern, Rune *buf)
          992 {
          993         if (!pattern)
          994                 return 0;
          995         subexp[0].s.rsp = buf;
          996         subexp[0].e.rep = 0;
          997         if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
          998                 loc1 = subexp[0].s.rsp;
          999                 loc2 = subexp[0].e.rep;
         1000                 return 1;
         1001         }
         1002         loc1 = loc2 = 0;
         1003         return 0;
         1004 }
         1005 
         1006 int
         1007 substitute(SedCom *ipc)
         1008 {
         1009         int len;
         1010 
         1011         if(!match(ipc->u.re1, linebuf))
         1012                 return 0;
         1013 
         1014         /*
         1015          * we have at least one match.  some patterns, e.g. '$' or '^', can
         1016          * produce zero-length matches, so during a global substitute we
         1017          * must bump to the character after a zero-length match to keep from looping.
         1018          */
         1019         sflag = 1;
         1020         if(ipc->gfl == 0)                /* single substitution */
         1021                 dosub(ipc->rhs);
         1022         else
         1023         do{                                /* global substitution */
         1024                 len = loc2-loc1;        /* length of match */
         1025                 dosub(ipc->rhs);        /* dosub moves loc2 */
         1026                 if(*loc2 == 0)                /* end of string */
         1027                         break;
         1028                 if(len == 0)                /* zero-length R.E. match */
         1029                         loc2++;                /* bump over zero-length match */
         1030                 if(*loc2 == 0)                /* end of string */
         1031                         break;
         1032         } while(match(ipc->u.re1, loc2));
         1033         return 1;
         1034 }
         1035 
         1036 void
         1037 dosub(Rune *rhsbuf)
         1038 {
         1039         Rune *lp, *sp;
         1040         Rune *rp;
         1041         int c, n;
         1042 
         1043         lp = linebuf;
         1044         sp = genbuf;
         1045         rp = rhsbuf;
         1046         while (lp < loc1)
         1047                 *sp++ = *lp++;
         1048         while(c = *rp++) {
         1049                 if (c == '&') {
         1050                         sp = place(sp, loc1, loc2);
         1051                         continue;
         1052                 }
         1053                 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB+'0') {
         1054                         n = c-'0';
         1055                         if (subexp[n].s.rsp && subexp[n].e.rep) {
         1056                                 sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
         1057                                 continue;
         1058                         }
         1059                         else {
         1060                                 fprint(2, "sed: Invalid back reference \\%d\n",n);
         1061                                 errexit();
         1062                         }
         1063                 }
         1064                 *sp++ = c;
         1065                 if (sp >= &genbuf[LBSIZE])
         1066                         fprint(2, "sed: Output line too long.\n");
         1067         }
         1068         lp = loc2;
         1069         loc2 = sp - genbuf + linebuf;
         1070         while (*sp++ = *lp++)
         1071                 if (sp >= &genbuf[LBSIZE])
         1072                         fprint(2, "sed: Output line too long.\n");
         1073         lp = linebuf;
         1074         sp = genbuf;
         1075         while (*lp++ = *sp++)
         1076                 ;
         1077         spend = lp-1;
         1078 }
         1079 
         1080 Rune *
         1081 place(Rune *sp, Rune *l1, Rune *l2)
         1082 {
         1083         while (l1 < l2) {
         1084                 *sp++ = *l1++;
         1085                 if (sp >= &genbuf[LBSIZE])
         1086                         fprint(2, "sed: Output line too long.\n");
         1087         }
         1088         return(sp);
         1089 }
         1090 
         1091 char *
         1092 trans(int c)
         1093 {
         1094         static char buf[] = "\\x0000";
         1095         static char hex[] = "0123456789abcdef";
         1096 
         1097         switch(c) {
         1098                 case '\b':
         1099                         return "\\b";
         1100                 case '\n':
         1101                         return "\\n";
         1102                 case '\r':
         1103                         return "\\r";
         1104                 case '\t':
         1105                         return "\\t";
         1106                 case '\\':
         1107                         return "\\\\";
         1108         }
         1109         buf[2] = hex[(c>>12)&0xF];
         1110         buf[3] = hex[(c>>8)&0xF];
         1111         buf[4] = hex[(c>>4)&0xF];
         1112         buf[5] = hex[c&0xF];
         1113         return buf;
         1114 }
         1115 
         1116 void
         1117 command(SedCom *ipc)
         1118 {
         1119         int        i, c;
         1120         Rune        *p1, *p2;
         1121         char        *ucp;
         1122         Rune        *rp;
         1123         Rune        *execp;
         1124 
         1125         switch(ipc->command) {
         1126 
         1127                 case ACOM:
         1128                         *aptr++ = ipc;
         1129                         if(aptr >= abuf+MAXADDS) {
         1130                                 quit("sed: Too many appends after line %ld\n",
         1131                                         (char *) lnum);
         1132                         }
         1133                         *aptr = 0;
         1134                         break;
         1135                 case CCOM:
         1136                         delflag = 1;
         1137                         if(ipc->active == 1) {
         1138                                 for(rp = ipc->u.text; *rp; rp++)
         1139                                         Bputrune(&fout, *rp);
         1140                                 Bputc(&fout, '\n');
         1141                         }
         1142                         break;
         1143                 case DCOM:
         1144                         delflag++;
         1145                         break;
         1146                 case CDCOM:
         1147                         p1 = p2 = linebuf;
         1148                         while(*p1 != '\n') {
         1149                                 if(*p1++ == 0) {
         1150                                         delflag++;
         1151                                         return;
         1152                                 }
         1153                         }
         1154                         p1++;
         1155                         while(*p2++ = *p1++)
         1156                                 ;
         1157                         spend = p2-1;
         1158                         jflag++;
         1159                         break;
         1160                 case EQCOM:
         1161                         Bprint(&fout, "%ld\n", lnum);
         1162                         break;
         1163                 case GCOM:
         1164                         p1 = linebuf;
         1165                         p2 = holdsp;
         1166                         while(*p1++ = *p2++)
         1167                                 ;
         1168                         spend = p1-1;
         1169                         break;
         1170                 case CGCOM:
         1171                         *spend++ = '\n';
         1172                         p1 = spend;
         1173                         p2 = holdsp;
         1174                         while(*p1++ = *p2++)
         1175                                 if(p1 >= lbend)
         1176                                         break;
         1177                         spend = p1-1;
         1178                         break;
         1179                 case HCOM:
         1180                         p1 = holdsp;
         1181                         p2 = linebuf;
         1182                         while(*p1++ = *p2++);
         1183                         hspend = p1-1;
         1184                         break;
         1185                 case CHCOM:
         1186                         *hspend++ = '\n';
         1187                         p1 = hspend;
         1188                         p2 = linebuf;
         1189                         while(*p1++ = *p2++)
         1190                                 if(p1 >= hend)
         1191                                         break;
         1192                         hspend = p1-1;
         1193                         break;
         1194                 case ICOM:
         1195                         for(rp = ipc->u.text; *rp; rp++)
         1196                                 Bputrune(&fout, *rp);
         1197                         Bputc(&fout, '\n');
         1198                         break;
         1199                 case BCOM:
         1200                         jflag = 1;
         1201                         break;
         1202                 case LCOM:
         1203                         c = 0;
         1204                         for (i = 0, rp = linebuf; *rp; rp++) {
         1205                                 c = *rp;
         1206                                 if(c >= 0x20 && c < 0x7F && c != '\\') {
         1207                                         Bputc(&fout, c);
         1208                                         if(i++ > 71) {
         1209                                                 Bprint(&fout, "\\\n");
         1210                                                 i = 0;
         1211                                         }
         1212                                 } else {
         1213                                         for (ucp = trans(*rp); *ucp; ucp++){
         1214                                                 c = *ucp;
         1215                                                 Bputc(&fout, c);
         1216                                                 if(i++ > 71) {
         1217                                                         Bprint(&fout, "\\\n");
         1218                                                         i = 0;
         1219                                                 }
         1220                                         }
         1221                                 }
         1222                         }
         1223                         if(c == ' ')
         1224                                 Bprint(&fout, "\\n");
         1225                         Bputc(&fout, '\n');
         1226                         break;
         1227                 case NCOM:
         1228                         if(!nflag)
         1229                                 putline(&fout, linebuf, spend-linebuf);
         1230 
         1231                         if(aptr > abuf)
         1232                                 arout();
         1233                         if((execp = gline(linebuf)) == 0) {
         1234                                 delflag = 1;
         1235                                 break;
         1236                         }
         1237                         spend = execp;
         1238                         break;
         1239                 case CNCOM:
         1240                         if(aptr > abuf)
         1241                                 arout();
         1242                         *spend++ = '\n';
         1243                         if((execp = gline(spend)) == 0) {
         1244                                 delflag = 1;
         1245                                 break;
         1246                         }
         1247                         spend = execp;
         1248                         break;
         1249                 case PCOM:
         1250                         putline(&fout, linebuf, spend-linebuf);
         1251                         break;
         1252                 case CPCOM:
         1253         cpcom:
         1254                         for(rp = linebuf; *rp && *rp != '\n'; rp++)
         1255                                 Bputc(&fout, *rp);
         1256                         Bputc(&fout, '\n');
         1257                         break;
         1258                 case QCOM:
         1259                         if(!nflag)
         1260                                 putline(&fout, linebuf, spend-linebuf);
         1261                         if(aptr > abuf)
         1262                                 arout();
         1263                         exits(0);
         1264                 case RCOM:
         1265                         *aptr++ = ipc;
         1266                         if(aptr >= &abuf[MAXADDS])
         1267                                 quit("sed: Too many reads after line %ld\n",
         1268                                         (char *) lnum);
         1269                         *aptr = 0;
         1270                         break;
         1271                 case SCOM:
         1272                         i = substitute(ipc);
         1273                         if(i && ipc->pfl)
         1274                                 if(ipc->pfl == 1)
         1275                                         putline(&fout, linebuf, spend-linebuf);
         1276                                 else
         1277                                         goto cpcom;
         1278                         if(i && ipc->fcode)
         1279                                 goto wcom;
         1280                         break;
         1281 
         1282                 case TCOM:
         1283                         if(sflag == 0)        break;
         1284                         sflag = 0;
         1285                         jflag = 1;
         1286                         break;
         1287 
         1288                 wcom:
         1289                 case WCOM:
         1290                         putline(ipc->fcode,linebuf, spend-linebuf);
         1291                         break;
         1292                 case XCOM:
         1293                         p1 = linebuf;
         1294                         p2 = genbuf;
         1295                         while(*p2++ = *p1++);
         1296                         p1 = holdsp;
         1297                         p2 = linebuf;
         1298                         while(*p2++ = *p1++);
         1299                         spend = p2 - 1;
         1300                         p1 = genbuf;
         1301                         p2 = holdsp;
         1302                         while(*p2++ = *p1++);
         1303                         hspend = p2 - 1;
         1304                         break;
         1305                 case YCOM:
         1306                         p1 = linebuf;
         1307                         p2 = ipc->u.text;
         1308                         for (i = *p2++;        *p1; p1++){
         1309                                 if (*p1 <= i) *p1 = p2[*p1];
         1310                         }
         1311                         break;
         1312         }
         1313 
         1314 }
         1315 
         1316 void
         1317 putline(Biobuf *bp, Rune *buf, int n)
         1318 {
         1319         while (n--)
         1320                 Bputrune(bp, *buf++);
         1321         Bputc(bp, '\n');
         1322         if(lflag)
         1323                 Bflush(bp);
         1324 }
         1325 
         1326 int
         1327 ecmp(Rune *a, Rune *b, int count)
         1328 {
         1329         while(count--)
         1330                 if(*a++ != *b++)        return(0);
         1331         return(1);
         1332 }
         1333 
         1334 void
         1335 arout(void)
         1336 {
         1337         Rune        *p1;
         1338         Biobuf        *fi;
         1339         int        c;
         1340         char        *s;
         1341         char        buf[128];
         1342 
         1343         for (aptr = abuf; *aptr; aptr++) {
         1344                 if((*aptr)->command == ACOM) {
         1345                         for(p1 = (*aptr)->u.text; *p1; p1++ )
         1346                                 Bputrune(&fout, *p1);
         1347                         Bputc(&fout, '\n');
         1348                 } else {
         1349                         for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
         1350                                         s += runetochar(s, p1);
         1351                         *s = '\0';
         1352                         if((fi = Bopen(buf, OREAD)) == 0)
         1353                                 continue;
         1354                         while((c = Bgetc(fi)) >= 0)
         1355                                 Bputc(&fout, c);
         1356                         Bterm(fi);
         1357                 }
         1358         }
         1359         aptr = abuf;
         1360         *aptr = 0;
         1361 }
         1362 
         1363 void
         1364 errexit(void)
         1365 {
         1366         exits("error");
         1367 }
         1368 
         1369 void
         1370 quit (char *msg, char *arg)
         1371 {
         1372         fprint(2, "sed: ");
         1373         fprint(2, msg, arg);
         1374         fprint(2, "\n");
         1375         errexit();
         1376 }
         1377 
         1378 Rune *
         1379 gline(Rune *addr)
         1380 {
         1381         long        c;
         1382         Rune *p;
         1383 
         1384         static long peekc = 0;
         1385 
         1386         if (f == 0 && opendata() < 0)
         1387                 return 0;
         1388         sflag = 0;
         1389         lnum++;
         1390 /*        Bflush(&fout);********* dumped 4/30/92 - bobf****/
         1391         do {
         1392                 p = addr;
         1393                 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
         1394                         if (c == '\n') {
         1395                                 if ((peekc = Bgetrune(f)) < 0) {
         1396                                         if (fhead == 0)
         1397                                                 dolflag = 1;
         1398                                 }
         1399                                 *p = '\0';
         1400                                 return p;
         1401                         }
         1402                         if (c && p < lbend)
         1403                                 *p++ = c;
         1404                 }
         1405                 /* return partial final line, adding implicit newline */
         1406                 if(p != addr) {
         1407                         *p = '\0';
         1408                         peekc = -1;
         1409                         if (fhead == 0)
         1410                                 dolflag = 1;
         1411                         return p;
         1412                 }
         1413                 peekc = 0;
         1414                 Bterm(f);
         1415         } while (opendata() > 0);        /* Switch to next stream */
         1416         f = 0;
         1417         return 0;
         1418 }
         1419 
         1420         /* Data file input section - the intent is to transparently
         1421          *        catenate all data input streams.
         1422          */
         1423 void
         1424 enroll(char *filename)                /* Add a file to the input file cache */
         1425 {
         1426         FileCache *fp;
         1427 
         1428         if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
         1429                 quit("Out of memory", 0);
         1430         if (ftail == 0)
         1431                 fhead = fp;
         1432         else
         1433                 ftail->next = fp;
         1434         ftail = fp;
         1435         fp->next = 0;
         1436         fp->name = filename;        /* 0 => stdin */
         1437 }
         1438 
         1439 int
         1440 opendata(void)
         1441 {
         1442         if (fhead == 0)
         1443                 return -1;
         1444         if (fhead->name) {
         1445                 if ((f = Bopen(fhead->name, OREAD)) == 0)
         1446                         quit("Can't open %s", fhead->name);
         1447         } else {
         1448                 Binit(&bstdin, 0, OREAD);
         1449                 f = &bstdin;
         1450         }
         1451         fhead = fhead->next;
         1452         return 1;
         1453 }