URI: 
       added commands as discussed with Uriel yesterday - 9base - revived minimalist port of Plan 9 userland to Unix
  HTML git clone git://git.suckless.org/9base
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit fa62640154da08c5fd229af50efde0d33871a0aa
   DIR parent 85bacddf7706d2c89c30c2433fb8c43cd794cdb5
  HTML Author: Anselm R Garbe <anselm@garbe.us>
       Date:   Fri, 28 May 2010 11:30:17 +0100
       
       added commands as discussed with Uriel yesterday
       Diffstat:
         M Makefile                            |      53 +++++++++++++++++++++++++++++--
         D TODO                                |      11 -----------
         A ascii/Makefile                      |      10 ++++++++++
         A ascii/ascii.1                       |     160 +++++++++++++++++++++++++++++++
         A ascii/ascii.c                       |     181 +++++++++++++++++++++++++++++++
         A cmp/Makefile                        |      10 ++++++++++
         A cmp/cmp.1                           |      57 +++++++++++++++++++++++++++++++
         A cmp/cmp.c                           |     112 +++++++++++++++++++++++++++++++
         A dd/Makefile                         |      10 ++++++++++
         A dd/dd.1                             |       0 
         A dd/dd.c                             |     660 +++++++++++++++++++++++++++++++
         A diff/Makefile                       |      35 +++++++++++++++++++++++++++++++
         A diff/diff.1                         |     163 +++++++++++++++++++++++++++++++
         A diff/diff.h                         |      27 +++++++++++++++++++++++++++
         A diff/diffdir.c                      |     113 +++++++++++++++++++++++++++++++
         A diff/diffio.c                       |     387 +++++++++++++++++++++++++++++++
         A diff/diffreg.c                      |     420 +++++++++++++++++++++++++++++++
         A diff/main.c                         |     270 +++++++++++++++++++++++++++++++
         A join/Makefile                       |      10 ++++++++++
         A join/join.1                         |     147 +++++++++++++++++++++++++++++++
         A join/join.c                         |     369 ++++++++++++++++++++++++++++++
         M lib9/utf.h                          |       3 ++-
         A look/Makefile                       |      10 ++++++++++
         A look/look.1                         |      85 +++++++++++++++++++++++++++++++
         A look/look.c                         |     349 +++++++++++++++++++++++++++++++
         A pbd/Makefile                        |      10 ++++++++++
         A pbd/pbd.1                           |       0 
         A pbd/pbd.c                           |      19 +++++++++++++++++++
         M rc/Makefile                         |       2 +-
         A split/Makefile                      |      10 ++++++++++
         A split/split.1                       |      82 +++++++++++++++++++++++++++++++
         A split/split.c                       |     189 +++++++++++++++++++++++++++++++
         A strings/Makefile                    |      10 ++++++++++
         A strings/strings.1                   |      28 ++++++++++++++++++++++++++++
         A strings/strings.c                   |      90 +++++++++++++++++++++++++++++++
         A unicode/Makefile                    |      10 ++++++++++
         A unicode/unicode.1                   |       0 
         A unicode/unicode.c                   |     122 +++++++++++++++++++++++++++++++
         A unutf/Makefile                      |      10 ++++++++++
         A unutf/unutf.1                       |       0 
         A unutf/unutf.c                       |      20 ++++++++++++++++++++
       
       41 files changed, 4238 insertions(+), 16 deletions(-)
       ---
   DIR diff --git a/Makefile b/Makefile
       @@ -2,9 +2,56 @@
        
        include config.mk
        
       -SUBDIRS  = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \
       -           factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \
       -           rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq
       +SUBDIRS  = lib9\
       +        yacc\
       +        ascii\
       +        awk\
       +        basename\
       +        bc\
       +        cal\
       +        cat\
       +        cleanname\
       +        cmp\
       +        date\
       +        dc\
       +        du\
       +        dd\
       +        diff\
       +        echo\
       +        ed\
       +        factor\
       +        fortune\
       +        fmt\
       +        freq\
       +        getflags\
       +        grep\
       +        hoc\
       +        join\
       +        look\
       +        ls\
       +        mk\
       +        mkdir\
       +        mtime\
       +        pbd\
       +        primes\
       +        rc\
       +        read\
       +        sha1sum\
       +        sed\
       +        seq\
       +        sleep\
       +        sort\
       +        split\
       +        strings\
       +        tail\
       +        tee\
       +        test\
       +        touch\
       +        tr\
       +        troff\
       +        unicode\
       +        uniq\
       +        unutf\
        
        all:
                @echo 9base build options:
   DIR diff --git a/TODO b/TODO
       @@ -1,11 +0,0 @@
       -12:13 < uriel> garbeam: add dd and diff too
       -12:13 < uriel> and split
       -12:14 < uriel> (and join)
       -12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but seems quite useful too)
       -12:15 < uriel> and tcs
       -12:16 < uriel> and strings
       -12:18 < uriel> oh, oh, I'm finding some great bits:
       -12:18 < uriel> look(1), ascii(1) and unicode(1)
       -12:19 < uriel> ok, and cmp(1) is missing too
       -12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth including)
       -12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it too
   DIR diff --git a/ascii/Makefile b/ascii/Makefile
       @@ -0,0 +1,10 @@
       +# ascii - ascii unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = ascii
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/ascii/ascii.1 b/ascii/ascii.1
       @@ -0,0 +1,160 @@
       +.TH ASCII 1 
       +.SH NAME
       +ascii, unicode \- interpret ASCII, Unicode characters
       +.SH SYNOPSIS
       +.B ascii
       +[
       +.B -8
       +]
       +[
       +.BI -oxdb n
       +]
       +[
       +.B -nct
       +]
       +[
       +.I text
       +]
       +.PP
       +.B unicode
       +[
       +.B -nt
       +]
       +.IB hexmin - hexmax
       +.PP
       +.B unicode
       +[
       +.B -t
       +]
       +.I hex
       +[
       +\&...
       +]
       +.PP
       +.B unicode
       +[
       +.B -n
       +]
       +.I characters
       +.PP
       +.B look
       +.I hex
       +.B \*9/lib/unicode
       +.SH DESCRIPTION
       +.I Ascii
       +prints the
       +.SM ASCII 
       +values corresponding to characters and
       +.I vice
       +.IR versa ;
       +under the
       +.B -8
       +option, the
       +.SM ISO
       +Latin-1 extensions (codes 0200-0377) are included.
       +The values are interpreted in a settable numeric base;
       +.B -o
       +specifies octal,
       +.B -d
       +decimal,
       +.B -x
       +hexadecimal (the default), and
       +.BI -b n
       +base
       +.IR n .
       +.PP
       +With no arguments,
       +.I ascii
       +prints a table of the character set in the specified base.
       +Characters of
       +.I text
       +are converted to their
       +.SM ASCII 
       +values, one per line. If, however, the first
       +.I text
       +argument is a valid number in the specified base, conversion
       +goes the opposite way.
       +Control characters are printed as two- or three-character mnemonics.
       +Other options are:
       +.TP
       +.B -n
       +Force numeric output.
       +.TP
       +.B -c
       +Force character output.
       +.TP
       +.B -t
       +Convert from numbers to running text; do not interpret
       +control characters or insert newlines.
       +.PP
       +.I Unicode
       +is similar; it converts between
       +.SM UTF
       +and character values from the Unicode Standard (see
       +.IR utf (7)).
       +If given a range of hexadecimal numbers,
       +.I unicode
       +prints a table of the specified Unicode characters \(em their values and
       +.SM UTF
       +representations.
       +Otherwise it translates from
       +.SM UTF
       +to numeric value or vice versa,
       +depending on the appearance of the supplied text;
       +the
       +.B -n
       +option forces numeric output to avoid ambiguity with numeric characters.
       +If converting to
       +.SM UTF ,
       +the characters are printed one per line unless the
       +.B -t
       +flag is set, in which case the output is a single string
       +containing only the specified characters.
       +Unlike
       +.IR ascii ,
       +.I unicode
       +treats no characters specially.
       +.PP
       +The output of
       +.I ascii
       +and
       +.I unicode
       +may be unhelpful if the characters printed are not available in the current font.
       +.PP
       +The file
       +.B \*9/lib/unicode
       +contains a
       +table of characters and descriptions, sorted in hexadecimal order,
       +suitable for
       +.IR look (1)
       +on the lower case
       +.I hex
       +values of characters.
       +.SH EXAMPLES
       +.TP
       +.B "ascii -d"
       +Print the
       +.SM ASCII 
       +table base 10.
       +.TP
       +.B "unicode p"
       +Print the hex value of `p'.
       +.TP
       +.B "unicode 2200-22f1"
       +Print a table of miscellaneous mathematical symbols.
       +.TP
       +.B "look 039 \*9/lib/unicode"
       +See the start of the Greek alphabet's encoding in the Unicode Standard.
       +.SH FILES
       +.TP
       +.B \*9/lib/unicode
       +table of characters and descriptions.
       +.SH SOURCE
       +.B \*9/src/cmd/ascii.c
       +.br
       +.B \*9/src/cmd/unicode.c
       +.SH "SEE ALSO"
       +.IR look (1),
       +.IR tcs (1),
       +.IR utf (7),
       +.IR font (7)
   DIR diff --git a/ascii/ascii.c b/ascii/ascii.c
       @@ -0,0 +1,181 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +
       +#define        MAXBASE        36
       +
       +void        usage(void);
       +void        put(int);
       +void        putn(int, int);
       +void        puttext(char *);
       +void        putnum(char *);
       +int        btoi(char *);
       +int        value(int, int);
       +int        isnum(char *);
       +
       +char *str[256]={
       +        "nul",        "soh",        "stx",        "etx",        "eot",        "enq",        "ack",        "bel",
       +        "bs ",        "ht ",        "nl ",        "vt ",        "np ",        "cr ",        "so ",        "si ",
       +        "dle",        "dc1",        "dc2",        "dc3",        "dc4",        "nak",        "syn",        "etb",
       +        "can",        "em ",        "sub",        "esc",        "fs ",        "gs ",        "rs ",        "us ",
       +        "sp ",        " ! ",        " \" ",        " # ",        " $ ",        " % ",        " & ",        " ' ",
       +        " ( ",        " ) ",        " * ",        " + ",        " , ",        " - ",        " . ",        " / ",
       +        " 0 ",        " 1 ",        " 2 ",        " 3 ",        " 4 ",        " 5 ",        " 6 ",        " 7 ",
       +        " 8 ",        " 9 ",        " : ",        " ; ",        " < ",        " = ",        " > ",        " ? ",
       +        " @ ",        " A ",        " B ",        " C ",        " D ",        " E ",        " F ",        " G ",
       +        " H ",        " I ",        " J ",        " K ",        " L ",        " M ",        " N ",        " O ",
       +        " P ",        " Q ",        " R ",        " S ",        " T ",        " U ",        " V ",        " W ",
       +        " X ",        " Y ",        " Z ",        " [ ",        " \\ ",        " ] ",        " ^ ",        " _ ",
       +        " ` ",        " a ",        " b ",        " c ",        " d ",        " e ",        " f ",        " g ",
       +        " h ",        " i ",        " j ",        " k ",        " l ",        " m ",        " n ",        " o ",
       +        " p ",        " q ",        " r ",        " s ",        " t ",        " u ",        " v ",        " w ",
       +        " x ",        " y ",        " z ",        " { ",        " | ",        " } ",        " ~ ",        "del",
       +        "x80",        "x81",        "x82",        "x83",        "x84",        "x85",        "x86",        "x87",
       +        "x88",        "x89",        "x8a",        "x8b",        "x8c",        "x8d",        "x8e",        "x8f",
       +        "x90",        "x91",        "x92",        "x93",        "x94",        "x95",        "x96",        "x97",
       +        "x98",        "x99",        "x9a",        "x9b",        "x9c",        "x9d",        "x9e",        "x9f",
       +        "xa0",        " ¡ ",        " ¢ ",        " £ ",        " ¤ ",        " ¥ ",        " ¦ ",        " § ",
       +        " ¨ ",        " © ",        " ª ",        " « ",        " ¬ ",        " ­ ",        " ® ",        " ¯ ",
       +        " ° ",        " ± ",        " ² ",        " ³ ",        " ´ ",        " µ ",        " ¶ ",        " · ",
       +        " ¸ ",        " ¹ ",        " º ",        " » ",        " ¼ ",        " ½ ",        " ¾ ",        " ¿ ",
       +        " À ",        " Á ",        " Â ",        " Ã ",        " Ä ",        " Å ",        " Æ ",        " Ç ",
       +        " È ",        " É ",        " Ê ",        " Ë ",        " Ì ",        " Í ",        " Î ",        " Ï ",
       +        " Ð ",        " Ñ ",        " Ò ",        " Ó ",        " Ô ",        " Õ ",        " Ö ",        " × ",
       +        " Ø ",        " Ù ",        " Ú ",        " Û ",        " Ü ",        " Ý ",        " Þ ",        " ß ",
       +        " à ",        " á ",        " â ",        " ã ",        " ä ",        " å ",        " æ ",        " ç ",
       +        " è ",        " é ",        " ê ",        " ë ",        " ì ",        " í ",        " î ",        " ï ",
       +        " ð ",        " ñ ",        " ò ",        " ó ",        " ô ",        " õ ",        " ö ",        " ÷ ",
       +        " ø ",        " ù ",        " ú ",        " û ",        " ü ",        " ý ",        " þ ",        " ÿ "
       +};
       +
       +char Ncol[]={
       +    0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
       +};
       +
       +int         nchars=128;
       +int         base=16;
       +int         ncol;
       +int         text=1;
       +int        strip=0;
       +Biobuf        bin;
       +
       +void
       +main(int argc, char **argv)
       +{
       +        int i;
       +
       +        Binit(&bin, 1, OWRITE);
       +        ARGBEGIN{
       +        case '8':
       +                nchars=256; break;
       +        case 'x':
       +                base=16; break;
       +        case 'o':
       +                base=8; break;
       +        case 'd':
       +                base=10; break;
       +        case 'b':
       +                base=strtoul(EARGF(usage()), 0, 0);
       +                if(base<2||base>MAXBASE)
       +                        usage();
       +                break;
       +        case 'n':
       +                text=0; break;
       +        case 't':
       +                strip=1;
       +                /* fall through */
       +        case 'c':
       +                text=2; break;
       +        default:
       +                usage();
       +        }ARGEND
       +
       +        ncol=Ncol[base];
       +        if(argc==0){
       +                for(i=0;i<nchars;i++){
       +                        put(i);
       +                        if((i&7)==7)
       +                                Bprint(&bin, "|\n");
       +                }
       +        }else{
       +                if(text==1)
       +                        text=isnum(argv[0]);
       +                while(argc--)
       +                        if(text)
       +                                puttext(*argv++);
       +                        else
       +                                putnum(*argv++);
       +        }
       +        Bputc(&bin, '\n');
       +        exits(0);
       +}
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0);
       +        exits("usage");
       +}
       +void
       +put(int i)
       +{
       +        Bputc(&bin, '|');
       +        putn(i, ncol);
       +        Bprint(&bin, " %s", str[i]);
       +}
       +char dig[]="0123456789abcdefghijklmnopqrstuvwxyz";
       +void
       +putn(int n, int ndig)
       +{
       +        if(ndig==0)
       +                return;
       +        putn(n/base, ndig-1);
       +        Bputc(&bin, dig[n%base]);
       +}
       +void
       +puttext(char *s)
       +{
       +        int n;
       +        n=btoi(s)&0377;
       +        if(strip)
       +                Bputc(&bin, n);
       +        else
       +                Bprint(&bin, "%s\n", str[n]);
       +}
       +void
       +putnum(char *s)
       +{
       +        while(*s){
       +                putn(*s++&0377, ncol);
       +                Bputc(&bin, '\n');
       +        }
       +}
       +int
       +btoi(char *s)
       +{
       +        int n;
       +        n=0;
       +        while(*s)
       +                n=n*base+value(*s++, 0);
       +        return(n);
       +}
       +int
       +value(int c, int f)
       +{
       +        char *s;
       +        for(s=dig; s<dig+base; s++)
       +                if(*s==c)
       +                        return(s-dig);
       +        if(f)
       +                return(-1);
       +        fprint(2, "%s: bad input char %c\n", argv0, c);
       +        exits("bad");
       +        return 0;        /* to keep ken happy */
       +}
       +int
       +isnum(char *s)
       +{
       +        while(*s)
       +                if(value(*s++, 1)==-1)
       +                        return(0);
       +        return(1);
       +}
   DIR diff --git a/cmp/Makefile b/cmp/Makefile
       @@ -0,0 +1,10 @@
       +# cmp - cmp unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = cmp
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/cmp/cmp.1 b/cmp/cmp.1
       @@ -0,0 +1,57 @@
       +.TH CMP 1 
       +.SH NAME
       +cmp \- compare two files
       +.SH SYNOPSIS
       +.B cmp
       +[
       +.B -lsL
       +]
       +.I file1 file2
       +[
       +.I offset1
       +[
       +.I offset2
       +]
       +]
       +.SH DESCRIPTION
       +The two files are
       +compared.
       +A diagnostic results if the contents differ, otherwise
       +there is no output.
       +.PP
       +The options are:
       +.TP
       +.B l
       +Print the byte number (decimal) and the
       +differing bytes (hexadecimal) for each difference.
       +.TP
       +.B s
       +Print nothing for differing files,
       +but set the exit status.
       +.TP
       +.B L
       +Print the line number of the first differing byte.
       +.PP
       +If offsets are given,
       +comparison starts at the designated byte position
       +of the corresponding file.
       +Offsets that begin with
       +.B 0x
       +are hexadecimal;
       +with
       +.BR 0 ,
       +octal; with anything else, decimal.
       +.SH SOURCE
       +.B \*9/src/cmd/cmp.c
       +.SH "SEE ALSO"
       +.IR diff (1) 
       +.SH DIAGNOSTICS
       +If a file is inaccessible or missing, the exit status is
       +.LR open .
       +If the files are the same, the exit status is empty (true).
       +If they are the same except that one is longer than the other, the exit status is
       +.LR EOF .
       +Otherwise
       +.I cmp
       +reports the position of the first disagreeing byte and the exit status is
       +.LR differ .
   DIR diff --git a/cmp/cmp.c b/cmp/cmp.c
       @@ -0,0 +1,112 @@
       +#include <u.h>
       +#include <libc.h>
       +
       +#define                BUF                65536
       +
       +int sflag = 0;
       +int lflag = 0;
       +int Lflag = 0;
       +
       +static void usage(void);
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        int n, i;
       +        uchar *p, *q;
       +        uchar buf1[BUF], buf2[BUF];
       +        int f1, f2;
       +        vlong nc = 1, o, l = 1;
       +        char *name1, *name2;
       +        uchar *b1s, *b1e, *b2s, *b2e;
       +
       +        ARGBEGIN{
       +        case 's':        sflag = 1; break;
       +        case 'l':        lflag = 1; break;
       +        case 'L':        Lflag = 1; break;
       +        default:        usage();
       +        }ARGEND
       +        if(argc < 2)
       +                usage();
       +        if((f1 = open(name1 = *argv++, OREAD)) == -1){
       +                if(!sflag) perror(name1);
       +                exits("open");
       +        }
       +        if((f2 = open(name2 = *argv++, OREAD)) == -1){
       +                if(!sflag) perror(name2);
       +                exits("open");
       +        }
       +        if(*argv){
       +                o = strtoll(*argv++, 0, 0);
       +                if(seek(f1, o, 0) < 0){
       +                        if(!sflag) perror("cmp: seek by offset1");
       +                        exits("seek 1");
       +                }
       +        }
       +        if(*argv){
       +                o = strtoll(*argv++, 0, 0);
       +                if(seek(f2, o, 0) < 0){
       +                        if(!sflag) perror("cmp: seek by offset2");
       +                        exits("seek 2");
       +                }
       +        }
       +        if(*argv)
       +                usage();
       +        b1s = b1e = buf1;
       +        b2s = b2e = buf2;
       +        for(;;){
       +                if(b1s >= b1e){
       +                        if(b1s >= &buf1[BUF])
       +                                b1s = buf1;
       +                        n = read(f1, b1s,  &buf1[BUF] - b1s);
       +                        b1e = b1s + n;
       +                }
       +                if(b2s >= b2e){
       +                        if(b2s >= &buf2[BUF])
       +                                b2s = buf2;
       +                        n = read(f2, b2s,  &buf2[BUF] - b2s);
       +                        b2e = b2s + n;
       +                }
       +                n = b2e - b2s;
       +                if(n > b1e - b1s)
       +                        n = b1e - b1s;
       +                if(n <= 0)
       +                        break;
       +                if(memcmp((void *)b1s, (void *)b2s, n) != 0){
       +                        if(sflag)
       +                                exits("differ");
       +                        for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) {
       +                                if(*p == '\n')
       +                                        l++;
       +                                if(*p != *q){
       +                                        if(!lflag){
       +                                                print("%s %s differ: char %lld",
       +                                                    name1, name2, nc+i);
       +                                                print(Lflag?" line %lld\n":"\n", l);
       +                                                exits("differ");
       +                                        }
       +                                        print("%6lld 0x%.2x 0x%.2x\n", nc+i, *p, *q);
       +                                }
       +                        }
       +                }                
       +                if(Lflag)
       +                        for(p = b1s; p < b1e;)
       +                                if(*p++ == '\n')
       +                                        l++;
       +                nc += n;
       +                b1s += n;
       +                b2s += n;
       +        }
       +        if(b1e - b1s == b2e - b2s)
       +                exits((char *)0);
       +        if(!sflag)
       +                print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1);
       +        exits("EOF");
       +}
       +
       +static void
       +usage(void)
       +{
       +        print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n");
       +        exits("usage");
       +}
   DIR diff --git a/dd/Makefile b/dd/Makefile
       @@ -0,0 +1,10 @@
       +# dd - dd unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = dd
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/dd/dd.1 b/dd/dd.1
   DIR diff --git a/dd/dd.c b/dd/dd.c
       @@ -0,0 +1,660 @@
       +#include <u.h>
       +#include <libc.h>
       +
       +#define        BIG        2147483647
       +#define        LCASE        (1<<0)
       +#define        UCASE        (1<<1)
       +#define        SWAB        (1<<2)
       +#define NERR        (1<<3)
       +#define SYNC        (1<<4)
       +int        cflag;
       +int        fflag;
       +char        *string;
       +char        *ifile;
       +char        *ofile;
       +char        *ibuf;
       +char        *obuf;
       +vlong        skip;
       +vlong        oseekn;
       +vlong        iseekn;
       +vlong        count;
       +long        files        = 1;
       +long        ibs        = 512;
       +long        obs        = 512;
       +long        bs;
       +long        cbs;
       +long        ibc;
       +long        obc;
       +long        cbc;
       +long        nifr;
       +long        nipr;
       +long        nofr;
       +long        nopr;
       +long        ntrunc;
       +int dotrunc = 1;
       +int        ibf;
       +int        obf;
       +char        *op;
       +int        nspace;
       +uchar        etoa[256];
       +uchar        atoe[256];
       +uchar        atoibm[256];
       +
       +void        flsh(void);
       +int        match(char *s);
       +vlong        number(long big);
       +void        cnull(int cc);
       +void        null(int c);
       +void        ascii(int cc);
       +void        unblock(int cc);
       +void        ebcdic(int cc);
       +void        ibm(int cc);
       +void        block(int cc);
       +void        term(void);
       +void        stats(void);
       +
       +#define        iskey(s)        ((key[0] == '-') && (strcmp(key+1, s) == 0))
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        void (*conv)(int);
       +        char *ip;
       +        char *key;
       +        int a, c;
       +
       +        conv = null;
       +        for(c=1; c<argc; c++) {
       +                key = argv[c++];
       +                if(c >= argc){
       +                        fprint(2, "dd: arg %s needs a value\n", key);
       +                        exits("arg");
       +                }
       +                string = argv[c];
       +                if(iskey("ibs")) {
       +                        ibs = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("obs")) {
       +                        obs = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("cbs")) {
       +                        cbs = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("bs")) {
       +                        bs = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("if")) {
       +                        ifile = string;
       +                        continue;
       +                }
       +                if(iskey("of")) {
       +                        ofile = string;
       +                        continue;
       +                }
       +                if(iskey("trunc")) {
       +                        dotrunc = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("skip")) {
       +                        skip = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("seek") || iskey("oseek")) {
       +                        oseekn = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("iseek")) {
       +                        iseekn = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("count")) {
       +                        count = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("files")) {
       +                        files = number(BIG);
       +                        continue;
       +                }
       +                if(iskey("conv")) {
       +                cloop:
       +                        if(match(","))
       +                                goto cloop;
       +                        if(*string == '\0')
       +                                continue;
       +                        if(match("ebcdic")) {
       +                                conv = ebcdic;
       +                                goto cloop;
       +                        }
       +                        if(match("ibm")) {
       +                                conv = ibm;
       +                                goto cloop;
       +                        }
       +                        if(match("ascii")) {
       +                                conv = ascii;
       +                                goto cloop;
       +                        }
       +                        if(match("block")) {
       +                                conv = block;
       +                                goto cloop;
       +                        }
       +                        if(match("unblock")) {
       +                                conv = unblock;
       +                                goto cloop;
       +                        }
       +                        if(match("lcase")) {
       +                                cflag |= LCASE;
       +                                goto cloop;
       +                        }
       +                        if(match("ucase")) {
       +                                cflag |= UCASE;
       +                                goto cloop;
       +                        }
       +                        if(match("swab")) {
       +                                cflag |= SWAB;
       +                                goto cloop;
       +                        }
       +                        if(match("noerror")) {
       +                                cflag |= NERR;
       +                                goto cloop;
       +                        }
       +                        if(match("sync")) {
       +                                cflag |= SYNC;
       +                                goto cloop;
       +                        }
       +                }
       +                fprint(2, "dd: bad arg: %s\n", key);
       +                exits("arg");
       +        }
       +        if(conv == null && cflag&(LCASE|UCASE))
       +                conv = cnull;
       +        if(ifile)
       +                ibf = open(ifile, 0);
       +        else
       +                ibf = dup(0, -1);
       +        if(ibf < 0) {
       +                fprint(2, "dd: open %s: %r\n", ifile);
       +                exits("open");
       +        }
       +        if(ofile){
       +                if(dotrunc)
       +                        obf = create(ofile, 1, 0664);
       +                else
       +                        obf = open(ofile, 1);
       +                if(obf < 0) {
       +                        fprint(2, "dd: create %s: %r\n", ofile);
       +                        exits("create");
       +                }
       +        }else{
       +                obf = dup(1, -1);
       +                if(obf < 0) {
       +                        fprint(2, "dd: can't dup file descriptor: %s: %r\n", ofile);
       +                        exits("dup");
       +                }
       +        }
       +        if(bs)
       +                ibs = obs = bs;
       +        if(ibs == obs && conv == null)
       +                fflag++;
       +        if(ibs == 0 || obs == 0) {
       +                fprint(2, "dd: counts: cannot be zero\n");
       +                exits("counts");
       +        }
       +        ibuf = sbrk(ibs);
       +        if(fflag)
       +                obuf = ibuf;
       +        else
       +                obuf = sbrk(obs);
       +        sbrk(64);        /* For good measure */
       +        if(ibuf == (char *)-1 || obuf == (char *)-1) {
       +                fprint(2, "dd: not enough memory: %r\n");
       +                exits("memory");
       +        }
       +        ibc = 0;
       +        obc = 0;
       +        cbc = 0;
       +        op = obuf;
       +
       +/*
       +        if(signal(SIGINT, SIG_IGN) != SIG_IGN)
       +                signal(SIGINT, term);
       +*/
       +        seek(obf, obs*oseekn, 1);
       +        seek(ibf, ibs*iseekn, 1);
       +        while(skip) {
       +                read(ibf, ibuf, ibs);
       +                skip--;
       +        }
       +
       +        ip = 0;
       +loop:
       +        if(ibc-- == 0) {
       +                ibc = 0;
       +                if(count==0 || nifr+nipr!=count) {
       +                        if(cflag&(NERR|SYNC))
       +                        for(ip=ibuf+ibs; ip>ibuf;)
       +                                *--ip = 0;
       +                        ibc = read(ibf, ibuf, ibs);
       +                }
       +                if(ibc == -1) {
       +                        perror("read");
       +                        if((cflag&NERR) == 0) {
       +                                flsh();
       +                                term();
       +                        }
       +                        ibc = 0;
       +                        for(c=0; c<ibs; c++)
       +                                if(ibuf[c] != 0)
       +                                        ibc = c;
       +                        stats();
       +                }
       +                if(ibc == 0 && --files<=0) {
       +                        flsh();
       +                        term();
       +                }
       +                if(ibc != ibs) {
       +                        nipr++;
       +                        if(cflag&SYNC)
       +                                ibc = ibs;
       +                } else
       +                        nifr++;
       +                ip = ibuf;
       +                c = (ibc>>1) & ~1;
       +                if(cflag&SWAB && c)
       +                do {
       +                        a = *ip++;
       +                        ip[-1] = *ip;
       +                        *ip++ = a;
       +                } while(--c);
       +                ip = ibuf;
       +                if(fflag) {
       +                        obc = ibc;
       +                        flsh();
       +                        ibc = 0;
       +                }
       +                goto loop;
       +        }
       +        c = 0;
       +        c |= *ip++;
       +        c &= 0377;
       +        (*conv)(c);
       +        goto loop;
       +}
       +
       +void
       +flsh(void)
       +{
       +        int c;
       +
       +        if(obc) {
       +                c = write(obf, obuf, obc);
       +                if(c != obc) {
       +                        if(c > 0)
       +                                ++nopr;
       +                        perror("write");
       +                        term();
       +                }
       +                if(obc == obs)
       +                        nofr++;
       +                else
       +                        nopr++;
       +                obc = 0;
       +        }
       +}
       +
       +int
       +match(char *s)
       +{
       +        char *cs;
       +
       +        cs = string;
       +        while(*cs++ == *s)
       +                if(*s++ == '\0')
       +                        goto true;
       +        if(*s != '\0')
       +                return 0;
       +
       +true:
       +        cs--;
       +        string = cs;
       +        return 1;
       +}
       +
       +vlong
       +number(long big)
       +{
       +        char *cs;
       +        vlong n;
       +
       +        cs = string;
       +        n = 0;
       +        while(*cs >= '0' && *cs <= '9')
       +                n = n*10 + *cs++ - '0';
       +        for(;;)
       +        switch(*cs++) {
       +
       +        case 'k':
       +                n *= 1024;
       +                continue;
       +
       +/*        case 'w':
       +                n *= sizeof(int);
       +                continue;
       +*/
       +
       +        case 'b':
       +                n *= 512;
       +                continue;
       +
       +/*        case '*':*/
       +        case 'x':
       +                string = cs;
       +                n *= number(BIG);
       +
       +        case '\0':
       +                if(n>=big || n<0) {
       +                        fprint(2, "dd: argument %lld out of range\n", n);
       +                        exits("range");
       +                }
       +                return n;
       +        }
       +        /* never gets here */
       +}
       +
       +void
       +cnull(int cc)
       +{
       +        int c;
       +
       +        c = cc;
       +        if((cflag&UCASE) && c>='a' && c<='z')
       +                c += 'A'-'a';
       +        if((cflag&LCASE) && c>='A' && c<='Z')
       +                c += 'a'-'A';
       +        null(c);
       +}
       +
       +void
       +null(int c)
       +{
       +
       +        *op = c;
       +        op++;
       +        if(++obc >= obs) {
       +                flsh();
       +                op = obuf;
       +        }
       +}
       +
       +void
       +ascii(int cc)
       +{
       +        int c;
       +
       +        c = etoa[cc];
       +        if(cbs == 0) {
       +                cnull(c);
       +                return;
       +        }
       +        if(c == ' ') {
       +                nspace++;
       +                goto out;
       +        }
       +        while(nspace > 0) {
       +                null(' ');
       +                nspace--;
       +        }
       +        cnull(c);
       +
       +out:
       +        if(++cbc >= cbs) {
       +                null('\n');
       +                cbc = 0;
       +                nspace = 0;
       +        }
       +}
       +
       +void
       +unblock(int cc)
       +{
       +        int c;
       +
       +        c = cc & 0377;
       +        if(cbs == 0) {
       +                cnull(c);
       +                return;
       +        }
       +        if(c == ' ') {
       +                nspace++;
       +                goto out;
       +        }
       +        while(nspace > 0) {
       +                null(' ');
       +                nspace--;
       +        }
       +        cnull(c);
       +
       +out:
       +        if(++cbc >= cbs) {
       +                null('\n');
       +                cbc = 0;
       +                nspace = 0;
       +        }
       +}
       +
       +void
       +ebcdic(int cc)
       +{
       +        int c;
       +
       +        c = cc;
       +        if(cflag&UCASE && c>='a' && c<='z')
       +                c += 'A'-'a';
       +        if(cflag&LCASE && c>='A' && c<='Z')
       +                c += 'a'-'A';
       +        c = atoe[c];
       +        if(cbs == 0) {
       +                null(c);
       +                return;
       +        }
       +        if(cc == '\n') {
       +                while(cbc < cbs) {
       +                        null(atoe[' ']);
       +                        cbc++;
       +                }
       +                cbc = 0;
       +                return;
       +        }
       +        if(cbc == cbs)
       +                ntrunc++;
       +        cbc++;
       +        if(cbc <= cbs)
       +                null(c);
       +}
       +
       +void
       +ibm(int cc)
       +{
       +        int c;
       +
       +        c = cc;
       +        if(cflag&UCASE && c>='a' && c<='z')
       +                c += 'A'-'a';
       +        if(cflag&LCASE && c>='A' && c<='Z')
       +                c += 'a'-'A';
       +        c = atoibm[c] & 0377;
       +        if(cbs == 0) {
       +                null(c);
       +                return;
       +        }
       +        if(cc == '\n') {
       +                while(cbc < cbs) {
       +                        null(atoibm[' ']);
       +                        cbc++;
       +                }
       +                cbc = 0;
       +                return;
       +        }
       +        if(cbc == cbs)
       +                ntrunc++;
       +        cbc++;
       +        if(cbc <= cbs)
       +                null(c);
       +}
       +
       +void
       +block(int cc)
       +{
       +        int c;
       +
       +        c = cc;
       +        if(cflag&UCASE && c>='a' && c<='z')
       +                c += 'A'-'a';
       +        if(cflag&LCASE && c>='A' && c<='Z')
       +                c += 'a'-'A';
       +        c &= 0377;
       +        if(cbs == 0) {
       +                null(c);
       +                return;
       +        }
       +        if(cc == '\n') {
       +                while(cbc < cbs) {
       +                        null(' ');
       +                        cbc++;
       +                }
       +                cbc = 0;
       +                return;
       +        }
       +        if(cbc == cbs)
       +                ntrunc++;
       +        cbc++;
       +        if(cbc <= cbs)
       +                null(c);
       +}
       +
       +void
       +term(void)
       +{
       +
       +        stats();
       +        exits(0);
       +}
       +
       +void
       +stats(void)
       +{
       +
       +        fprint(2, "%lud+%lud records in\n", nifr, nipr);
       +        fprint(2, "%lud+%lud records out\n", nofr, nopr);
       +        if(ntrunc)
       +                fprint(2, "%lud truncated records\n", ntrunc);
       +}
       +
       +uchar        etoa[] =
       +{
       +        0000,0001,0002,0003,0234,0011,0206,0177,
       +        0227,0215,0216,0013,0014,0015,0016,0017,
       +        0020,0021,0022,0023,0235,0205,0010,0207,
       +        0030,0031,0222,0217,0034,0035,0036,0037,
       +        0200,0201,0202,0203,0204,0012,0027,0033,
       +        0210,0211,0212,0213,0214,0005,0006,0007,
       +        0220,0221,0026,0223,0224,0225,0226,0004,
       +        0230,0231,0232,0233,0024,0025,0236,0032,
       +        0040,0240,0241,0242,0243,0244,0245,0246,
       +        0247,0250,0133,0056,0074,0050,0053,0041,
       +        0046,0251,0252,0253,0254,0255,0256,0257,
       +        0260,0261,0135,0044,0052,0051,0073,0136,
       +        0055,0057,0262,0263,0264,0265,0266,0267,
       +        0270,0271,0174,0054,0045,0137,0076,0077,
       +        0272,0273,0274,0275,0276,0277,0300,0301,
       +        0302,0140,0072,0043,0100,0047,0075,0042,
       +        0303,0141,0142,0143,0144,0145,0146,0147,
       +        0150,0151,0304,0305,0306,0307,0310,0311,
       +        0312,0152,0153,0154,0155,0156,0157,0160,
       +        0161,0162,0313,0314,0315,0316,0317,0320,
       +        0321,0176,0163,0164,0165,0166,0167,0170,
       +        0171,0172,0322,0323,0324,0325,0326,0327,
       +        0330,0331,0332,0333,0334,0335,0336,0337,
       +        0340,0341,0342,0343,0344,0345,0346,0347,
       +        0173,0101,0102,0103,0104,0105,0106,0107,
       +        0110,0111,0350,0351,0352,0353,0354,0355,
       +        0175,0112,0113,0114,0115,0116,0117,0120,
       +        0121,0122,0356,0357,0360,0361,0362,0363,
       +        0134,0237,0123,0124,0125,0126,0127,0130,
       +        0131,0132,0364,0365,0366,0367,0370,0371,
       +        0060,0061,0062,0063,0064,0065,0066,0067,
       +        0070,0071,0372,0373,0374,0375,0376,0377,
       +};
       +uchar        atoe[] =
       +{
       +        0000,0001,0002,0003,0067,0055,0056,0057,
       +        0026,0005,0045,0013,0014,0015,0016,0017,
       +        0020,0021,0022,0023,0074,0075,0062,0046,
       +        0030,0031,0077,0047,0034,0035,0036,0037,
       +        0100,0117,0177,0173,0133,0154,0120,0175,
       +        0115,0135,0134,0116,0153,0140,0113,0141,
       +        0360,0361,0362,0363,0364,0365,0366,0367,
       +        0370,0371,0172,0136,0114,0176,0156,0157,
       +        0174,0301,0302,0303,0304,0305,0306,0307,
       +        0310,0311,0321,0322,0323,0324,0325,0326,
       +        0327,0330,0331,0342,0343,0344,0345,0346,
       +        0347,0350,0351,0112,0340,0132,0137,0155,
       +        0171,0201,0202,0203,0204,0205,0206,0207,
       +        0210,0211,0221,0222,0223,0224,0225,0226,
       +        0227,0230,0231,0242,0243,0244,0245,0246,
       +        0247,0250,0251,0300,0152,0320,0241,0007,
       +        0040,0041,0042,0043,0044,0025,0006,0027,
       +        0050,0051,0052,0053,0054,0011,0012,0033,
       +        0060,0061,0032,0063,0064,0065,0066,0010,
       +        0070,0071,0072,0073,0004,0024,0076,0341,
       +        0101,0102,0103,0104,0105,0106,0107,0110,
       +        0111,0121,0122,0123,0124,0125,0126,0127,
       +        0130,0131,0142,0143,0144,0145,0146,0147,
       +        0150,0151,0160,0161,0162,0163,0164,0165,
       +        0166,0167,0170,0200,0212,0213,0214,0215,
       +        0216,0217,0220,0232,0233,0234,0235,0236,
       +        0237,0240,0252,0253,0254,0255,0256,0257,
       +        0260,0261,0262,0263,0264,0265,0266,0267,
       +        0270,0271,0272,0273,0274,0275,0276,0277,
       +        0312,0313,0314,0315,0316,0317,0332,0333,
       +        0334,0335,0336,0337,0352,0353,0354,0355,
       +        0356,0357,0372,0373,0374,0375,0376,0377,
       +};
       +uchar        atoibm[] =
       +{
       +        0000,0001,0002,0003,0067,0055,0056,0057,
       +        0026,0005,0045,0013,0014,0015,0016,0017,
       +        0020,0021,0022,0023,0074,0075,0062,0046,
       +        0030,0031,0077,0047,0034,0035,0036,0037,
       +        0100,0132,0177,0173,0133,0154,0120,0175,
       +        0115,0135,0134,0116,0153,0140,0113,0141,
       +        0360,0361,0362,0363,0364,0365,0366,0367,
       +        0370,0371,0172,0136,0114,0176,0156,0157,
       +        0174,0301,0302,0303,0304,0305,0306,0307,
       +        0310,0311,0321,0322,0323,0324,0325,0326,
       +        0327,0330,0331,0342,0343,0344,0345,0346,
       +        0347,0350,0351,0255,0340,0275,0137,0155,
       +        0171,0201,0202,0203,0204,0205,0206,0207,
       +        0210,0211,0221,0222,0223,0224,0225,0226,
       +        0227,0230,0231,0242,0243,0244,0245,0246,
       +        0247,0250,0251,0300,0117,0320,0241,0007,
       +        0040,0041,0042,0043,0044,0025,0006,0027,
       +        0050,0051,0052,0053,0054,0011,0012,0033,
       +        0060,0061,0032,0063,0064,0065,0066,0010,
       +        0070,0071,0072,0073,0004,0024,0076,0341,
       +        0101,0102,0103,0104,0105,0106,0107,0110,
       +        0111,0121,0122,0123,0124,0125,0126,0127,
       +        0130,0131,0142,0143,0144,0145,0146,0147,
       +        0150,0151,0160,0161,0162,0163,0164,0165,
       +        0166,0167,0170,0200,0212,0213,0214,0215,
       +        0216,0217,0220,0232,0233,0234,0235,0236,
       +        0237,0240,0252,0253,0254,0255,0256,0257,
       +        0260,0261,0262,0263,0264,0265,0266,0267,
       +        0270,0271,0272,0273,0274,0275,0276,0277,
       +        0312,0313,0314,0315,0316,0317,0332,0333,
       +        0334,0335,0336,0337,0352,0353,0354,0355,
       +        0356,0357,0372,0373,0374,0375,0376,0377,
       +};
   DIR diff --git a/diff/Makefile b/diff/Makefile
       @@ -0,0 +1,35 @@
       +# diff - diff shell unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = diff
       +OFILES    = diffdir.o diffio.o diffreg.o main.o
       +MANFILES  = diff.1
       +
       +include ../config.mk
       +
       +all: ${TARG}
       +        @strip ${TARG}
       +        @echo built ${TARG}
       +
       +install: ${TARG}
       +        @mkdir -p ${DESTDIR}${PREFIX}/bin
       +        @cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/
       +        @chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG}
       +        @mkdir -p ${DESTDIR}${MANPREFIX}/man1
       +        @cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1
       +        @chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES}
       +
       +uninstall:
       +        rm -f ${DESTDIR}${PREFIX}/bin/${TARG}
       +        rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES}
       +
       +.c.o:
       +        @echo CC $*.c
       +        @${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
       +
       +clean:
       +        rm -f ${OFILES} ${TARG}
       +
       +${TARG}: ${OFILES}
       +        @echo LD ${TARG}
       +        @${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -l9
   DIR diff --git a/diff/diff.1 b/diff/diff.1
       @@ -0,0 +1,163 @@
       +.TH DIFF 1 
       +.SH NAME
       +diff \- differential file comparator
       +.SH SYNOPSIS
       +.B diff
       +[
       +.B -acefmnbwr
       +] file1 ... file2
       +.SH DESCRIPTION
       +.I Diff
       +tells what lines must be changed in two files to bring them
       +into agreement.
       +If one file
       +is a directory,
       +then a file in that directory with basename the same as that of
       +the other file is used.
       +If both files are directories, similarly named files in the
       +two directories are compared by the method of 
       +.I diff
       +for text
       +files and
       +.IR cmp (1)
       +otherwise.
       +If more than two file names are given, then each argument is compared
       +to the last argument as above.
       +The 
       +.B -r
       +option causes
       +.I diff
       +to process similarly named subdirectories recursively.
       +When processing more than one file, 
       +.I diff
       +prefixes file differences with a single line
       +listing the two differing files, in the form of
       +a 
       +.I diff
       +command line.
       +The
       +.B -m
       +flag causes this behavior even when processing single files.
       +.PP
       +The normal output contains lines of these forms:
       +.IP "" 5
       +.I n1
       +.B a
       +.I n3,n4
       +.br
       +.I n1,n2
       +.B d
       +.I n3
       +.br
       +.I n1,n2
       +.B c
       +.I n3,n4
       +.PP
       +These lines resemble
       +.I ed
       +commands to convert
       +.I file1
       +into
       +.IR file2 .
       +The numbers after the letters pertain to
       +.IR file2 .
       +In fact, by exchanging `a' for `d' and reading backward
       +one may ascertain equally how to convert 
       +.I file2
       +into
       +.IR file1 .
       +As in 
       +.IR ed ,
       +identical pairs where
       +.I n1
       +=
       +.I n2
       +or
       +.I n3
       +=
       +.I n4
       +are abbreviated as a single number.
       +.PP
       +Following each of these lines come all the lines that are
       +affected in the first file flagged by `<', 
       +then all the lines that are affected in the second file
       +flagged by `>'.
       +.PP
       +The
       +.B -b
       +option causes
       +trailing blanks (spaces and tabs) to be ignored
       +and other strings of blanks to compare equal.
       +The
       +.B -w
       +option causes all white-space to be removed from input lines
       +before applying the difference algorithm.
       +.PP
       +The
       +.B -n
       +option prefixes each range with 
       +.IB file : \fR
       +and inserts a space around the 
       +.BR a ,
       +.BR c ,
       +and
       +.B d
       +verbs.
       +The
       +.B -e
       +option produces a script of
       +.I "a, c"
       +and 
       +.I d
       +commands for the editor
       +.IR ed ,
       +which will recreate
       +.I file2
       +from
       +.IR file1 .
       +The
       +.B -f
       +option produces a similar script,
       +not useful with
       +.IR ed ,
       +in the opposite order. It may, however, be
       +useful as input to a stream-oriented post-processor.
       +.PP
       +The
       +.B -c
       +option includes three lines of context around each
       +change, merging changes whose contexts overlap.
       +The
       +.B -a
       +flag displays the entire file as context.
       +.PP
       +Except in rare circumstances,
       +.I diff
       +finds a smallest sufficient set of file
       +differences.
       +.SH FILES
       +.B /tmp/diff[12]
       +.SH SOURCE
       +.B \*9/src/cmd/diff
       +.SH "SEE ALSO"
       +.IR cmp (1),
       +.IR comm (1),
       +.IR ed (1)
       +.SH DIAGNOSTICS
       +Exit status is the empty string
       +for no differences,
       +.L some
       +for some, 
       +and
       +.L error
       +for trouble.
       +.SH BUGS
       +Editing scripts produced under the
       +.BR -e " or"
       +.BR -f " option are naive about"
       +creating lines consisting of a single `\fB.\fR'.
       +.PP
       +When running
       +.I diff
       +on directories, the notion of what is a text
       +file is open to debate.
   DIR diff --git a/diff/diff.h b/diff/diff.h
       @@ -0,0 +1,27 @@
       +#define stdout bstdout
       +
       +char mode;                        /* '\0', 'e', 'f', 'h' */
       +char bflag;                        /* ignore multiple and trailing blanks */
       +char rflag;                        /* recurse down directory trees */
       +char mflag;                        /* pseudo flag: doing multiple files, one dir */
       +int anychange;
       +extern Biobuf        stdout;
       +extern int        binary;
       +
       +#define MALLOC(t, n)                ((t *)emalloc((n)*sizeof(t)))
       +#define REALLOC(p, t, n)        ((t *)erealloc((void *)(p), (n)*sizeof(t)))
       +#define FREE(p)                        free((void *)(p))
       +
       +#define MAXPATHLEN        1024
       +
       +int mkpathname(char *, char *, char *);
       +void *emalloc(unsigned);
       +void *erealloc(void *, unsigned);
       +void diff(char *, char *, int);
       +void diffdir(char *, char *, int);
       +void diffreg(char *, char *);
       +Biobuf *prepare(int, char *);
       +void panic(int, char *, ...);
       +void check(Biobuf *, Biobuf *);
       +void change(int, int, int, int);
       +void flushchanges(void);
   DIR diff --git a/diff/diffdir.c b/diff/diffdir.c
       @@ -0,0 +1,113 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include "diff.h"
       +
       +static int
       +itemcmp(const void *v1, const void *v2)
       +{
       +        char *const*d1 = v1, *const*d2 = v2;
       +
       +        return strcmp(*d1, *d2);
       +}
       +
       +static char **
       +scandir(char *name)
       +{
       +        char **cp;
       +        Dir *db;
       +        int nitems;
       +        int fd, n;
       +
       +        if ((fd = open(name, OREAD)) < 0){
       +                panic(mflag ? 0 : 2, "can't open %s\n", name);
       +                return nil;
       +        }
       +        cp = 0;
       +        nitems = 0;
       +        if((n = dirreadall(fd, &db)) > 0){
       +                while (n--) {
       +                        cp = REALLOC(cp, char *, (nitems+1));
       +                        cp[nitems] = MALLOC(char, strlen((db+n)->name)+1);
       +                        strcpy(cp[nitems], (db+n)->name);
       +                        nitems++;
       +                }
       +                free(db);
       +        }
       +        cp = REALLOC(cp, char*, (nitems+1));
       +        cp[nitems] = 0;
       +        close(fd);
       +        qsort((char *)cp, nitems, sizeof(char*), itemcmp);
       +        return cp;
       +}
       +
       +static int
       +isdotordotdot(char *p)
       +{
       +        if (*p == '.') {
       +                if (!p[1])
       +                        return 1;
       +                if (p[1] == '.' && !p[2])
       +                        return 1;
       +        }
       +        return 0;
       +}
       +
       +void
       +diffdir(char *f, char *t, int level)
       +{
       +        char  **df, **dt, **dirf, **dirt;
       +        char *from, *to;
       +        int res;
       +        char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
       +
       +        df = scandir(f);
       +        dt = scandir(t);
       +        dirf = df;
       +        dirt = dt;
       +        if(df == nil || dt == nil)
       +                goto Out;
       +        while (*df || *dt) {
       +                from = *df;
       +                to = *dt;
       +                if (from && isdotordotdot(from)) {
       +                        df++;
       +                        continue;
       +                }
       +                if (to && isdotordotdot(to)) {
       +                        dt++;
       +                        continue;
       +                }
       +                if (!from)
       +                        res = 1;
       +                else if (!to)
       +                        res = -1;
       +                else
       +                        res = strcmp(from, to);
       +                if (res < 0) {
       +                        if (mode == 0 || mode == 'n')
       +                                Bprint(&stdout, "Only in %s: %s\n", f, from);
       +                        df++;
       +                        continue;
       +                }
       +                if (res > 0) {
       +                        if (mode == 0 || mode == 'n')
       +                                Bprint(&stdout, "Only in %s: %s\n", t, to);
       +                        dt++;
       +                        continue;
       +                }
       +                if (mkpathname(fb, f, from))
       +                        continue;
       +                if (mkpathname(tb, t, to))
       +                        continue;
       +                diff(fb, tb, level+1);
       +                df++; dt++;
       +        }
       +Out:
       +        for (df = dirf; df && *df; df++)
       +                FREE(*df);
       +        for (dt = dirt; dt && *dt; dt++)
       +                FREE(*dt);
       +        FREE(dirf);
       +        FREE(dirt);
       +}
   DIR diff --git a/diff/diffio.c b/diff/diffio.c
       @@ -0,0 +1,387 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include <ctype.h>
       +#include "diff.h"
       +
       +struct line {
       +        int        serial;
       +        int        value;
       +};
       +extern struct line *file[2];
       +extern int len[2];
       +extern long *ixold, *ixnew;
       +extern int *J;
       +
       +static Biobuf *input[2];
       +static char *file1, *file2;
       +static int firstchange;
       +
       +#define MAXLINELEN        4096
       +#define MIN(x, y)        ((x) < (y) ? (x): (y))
       +
       +static int
       +readline(Biobuf *bp, char *buf)
       +{
       +        int c;
       +        char *p, *e;
       +
       +        p = buf;
       +        e = p + MAXLINELEN-1;
       +        do {
       +                c = Bgetc(bp);
       +                if (c < 0) {
       +                        if (p == buf)
       +                                return -1;
       +                        break;
       +                }
       +                if (c == '\n')
       +                        break;
       +                *p++ = c;
       +        } while (p < e);
       +        *p = 0;
       +        if (c != '\n' && c >= 0) {
       +                do c = Bgetc(bp);
       +                while (c >= 0 && c != '\n');
       +        }
       +        return p - buf;
       +}
       +
       +#define HALFLONG 16
       +#define low(x)        (x&((1L<<HALFLONG)-1))
       +#define high(x)        (x>>HALFLONG)
       +
       +/*
       + * hashing has the effect of
       + * arranging line in 7-bit bytes and then
       + * summing 1-s complement in 16-bit hunks 
       + */
       +static int
       +readhash(Biobuf *bp, char *buf)
       +{
       +        long sum;
       +        unsigned shift;
       +        char *p;
       +        int len, space;
       +
       +        sum = 1;
       +        shift = 0;
       +        if ((len = readline(bp, buf)) == -1)
       +                return 0;
       +        p = buf;
       +        switch(bflag)        /* various types of white space handling */
       +        {
       +        case 0:
       +                while (len--) {
       +                        sum += (long)*p++ << (shift &= (HALFLONG-1));
       +                        shift += 7;
       +                }
       +                break;
       +        case 1:
       +                /*
       +                 * coalesce multiple white-space
       +                 */
       +                for (space = 0; len--; p++) {
       +                        if (isspace((uchar)*p)) {
       +                                space++;
       +                                continue;
       +                        }
       +                        if (space) {
       +                                shift += 7;
       +                                space = 0;
       +                        }
       +                        sum += (long)*p << (shift &= (HALFLONG-1));
       +                        shift += 7;
       +                }
       +                break;
       +        default:
       +                /*
       +                 * strip all white-space
       +                 */
       +                while (len--) {
       +                        if (isspace((uchar)*p)) {
       +                                p++;
       +                                continue;
       +                        }
       +                        sum += (long)*p++ << (shift &= (HALFLONG-1));
       +                        shift += 7;
       +                }
       +                break;
       +        }
       +        sum = low(sum) + high(sum);
       +        return ((short)low(sum) + (short)high(sum));
       +}
       +
       +Biobuf *
       +prepare(int i, char *arg)
       +{
       +        struct line *p;
       +        int j, h;
       +        Biobuf *bp;
       +        char *cp, buf[MAXLINELEN];
       +        int nbytes;
       +        Rune r;
       +
       +        bp = Bopen(arg, OREAD);
       +        if (!bp) {
       +                panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
       +                return 0;
       +        }
       +        if (binary)
       +                return bp;
       +        nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
       +        if (nbytes > 0) {
       +                cp = buf;
       +                while (cp < buf+nbytes-UTFmax) {
       +                        /*
       +                         * heuristic for a binary file in the
       +                         * brave new UNICODE world
       +                         */
       +                        cp += chartorune(&r, cp);
       +                        if (r == 0 || (r > 0x7f && r <= 0xa0)) {
       +                                binary++;
       +                                return bp;
       +                        }
       +                }
       +                Bseek(bp, 0, 0);
       +        }
       +        p = MALLOC(struct line, 3);
       +        for (j = 0; h = readhash(bp, buf); p[j].value = h)
       +                p = REALLOC(p, struct line, (++j+3));
       +        len[i] = j;
       +        file[i] = p;
       +        input[i] = bp;                        /*fix*/
       +        if (i == 0) {                        /*fix*/
       +                file1 = arg;
       +                firstchange = 0;
       +        }
       +        else
       +                file2 = arg;
       +        return bp;
       +}
       +
       +static int
       +squishspace(char *buf)
       +{
       +        char *p, *q;
       +        int space;
       +
       +        for (space = 0, q = p = buf; *q; q++) {
       +                if (isspace((uchar)*q)) {
       +                        space++;
       +                        continue;
       +                }
       +                if (space && bflag == 1) {
       +                        *p++ = ' ';
       +                        space = 0;
       +                }
       +                *p++ = *q;
       +        }
       +        *p = 0;
       +        return p - buf;
       +}
       +
       +/*
       + * need to fix up for unexpected EOF's
       + */
       +void
       +check(Biobuf *bf, Biobuf *bt)
       +{
       +        int f, t, flen, tlen;
       +        char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
       +
       +        ixold[0] = ixnew[0] = 0;
       +        for (f = t = 1; f < len[0]; f++) {
       +                flen = readline(bf, fbuf);
       +                ixold[f] = ixold[f-1] + flen + 1;                /* ftell(bf) */
       +                if (J[f] == 0)
       +                        continue;
       +                do {
       +                        tlen = readline(bt, tbuf);
       +                        ixnew[t] = ixnew[t-1] + tlen + 1;        /* ftell(bt) */
       +                } while (t++ < J[f]);
       +                if (bflag) {
       +                        flen = squishspace(fbuf);
       +                        tlen = squishspace(tbuf);
       +                }
       +                if (flen != tlen || strcmp(fbuf, tbuf))
       +                        J[f] = 0;
       +        }
       +        while (t < len[1]) {
       +                tlen = readline(bt, tbuf);
       +                ixnew[t] = ixnew[t-1] + tlen + 1;        /* fseek(bt) */
       +                t++;
       +        }
       +}
       +
       +static void
       +range(int a, int b, char *separator)
       +{
       +        Bprint(&stdout, "%d", a > b ? b: a);
       +        if (a < b)
       +                Bprint(&stdout, "%s%d", separator, b);
       +}
       +
       +static void
       +fetch(long *f, int a, int b, Biobuf *bp, char *s)
       +{
       +        char buf[MAXLINELEN];
       +        int maxb;
       +
       +        if(a <= 1)
       +                a = 1;
       +        if(bp == input[0])
       +                maxb = len[0];
       +        else
       +                maxb = len[1];
       +        if(b > maxb)
       +                b = maxb;
       +        if(a > maxb)
       +                return;
       +        Bseek(bp, f[a-1], 0);
       +        while (a++ <= b) {
       +                readline(bp, buf);
       +                Bprint(&stdout, "%s%s\n", s, buf);
       +        }
       +}
       +
       +typedef struct Change Change;
       +struct Change
       +{
       +        int a;
       +        int b;
       +        int c;
       +        int d;
       +};
       +
       +Change *changes;
       +int nchanges;
       +
       +void
       +change(int a, int b, int c, int d)
       +{
       +        char verb;
       +        char buf[4];
       +        Change *ch;
       +
       +        if (a > b && c > d)
       +                return;
       +        anychange = 1;
       +        if (mflag && firstchange == 0) {
       +                if(mode) {
       +                        buf[0] = '-';
       +                        buf[1] = mode;
       +                        buf[2] = ' ';
       +                        buf[3] = '\0';
       +                } else {
       +                        buf[0] = '\0';
       +                }
       +                Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
       +                firstchange = 1;
       +        }
       +        verb = a > b ? 'a': c > d ? 'd': 'c';
       +        switch(mode) {
       +        case 'e':
       +                range(a, b, ",");
       +                Bputc(&stdout, verb);
       +                break;
       +        case 0:
       +                range(a, b, ",");
       +                Bputc(&stdout, verb);
       +                range(c, d, ",");
       +                break;
       +        case 'n':
       +                Bprint(&stdout, "%s:", file1);
       +                range(a, b, ",");
       +                Bprint(&stdout, " %c ", verb);
       +                Bprint(&stdout, "%s:", file2);
       +                range(c, d, ",");
       +                break;
       +        case 'f':
       +                Bputc(&stdout, verb);
       +                range(a, b, " ");
       +                break;
       +        case 'c':
       +        case 'a':
       +                if(nchanges%1024 == 0)
       +                        changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0]));
       +                ch = &changes[nchanges++];
       +                ch->a = a;
       +                ch->b = b;
       +                ch->c = c;
       +                ch->d = d;
       +                return;
       +        }
       +        Bputc(&stdout, '\n');
       +        if (mode == 0 || mode == 'n') {
       +                fetch(ixold, a, b, input[0], "< ");
       +                if (a <= b && c <= d)
       +                        Bprint(&stdout, "---\n");
       +        }
       +        fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
       +        if (mode != 0 && mode != 'n' && c <= d)
       +                Bprint(&stdout, ".\n");
       +}
       +
       +enum
       +{
       +        Lines = 3                /* number of lines of context shown */
       +};
       +
       +int
       +changeset(int i)
       +{
       +        while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
       +                i++;
       +        if(i<nchanges)
       +                return i+1;
       +        return nchanges;
       +}
       +
       +void
       +flushchanges(void)
       +{
       +        int a, b, c, d, at;
       +        int i, j;
       +
       +        if(nchanges == 0)
       +                return;
       +        
       +        for(i=0; i<nchanges; ){
       +                j = changeset(i);
       +                a = changes[i].a-Lines;
       +                b = changes[j-1].b+Lines;
       +                c = changes[i].c-Lines;
       +                d = changes[j-1].d+Lines;
       +                if(a < 1)
       +                        a = 1;
       +                if(c < 1)
       +                        c = 1;
       +                if(b > len[0])
       +                        b = len[0];
       +                if(d > len[1])
       +                        d = len[1];
       +                if(mode == 'a'){
       +                        a = 1;
       +                        b = len[0];
       +                        c = 1;
       +                        d = len[1];
       +                        j = nchanges;
       +                }
       +                Bprint(&stdout, "%s:", file1);
       +                range(a, b, ",");
       +                Bprint(&stdout, " - ");
       +                Bprint(&stdout, "%s:", file2);
       +                range(c, d, ",");
       +                Bputc(&stdout, '\n');
       +                at = a;
       +                for(; i<j; i++){
       +                        fetch(ixold, at, changes[i].a-1, input[0], "  ");
       +                        fetch(ixold, changes[i].a, changes[i].b, input[0], "- ");
       +                        fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ ");
       +                        at = changes[i].b+1;
       +                }
       +                fetch(ixold, at, b, input[0], "  ");
       +        }
       +        nchanges = 0;
       +}
   DIR diff --git a/diff/diffreg.c b/diff/diffreg.c
       @@ -0,0 +1,420 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include "diff.h"
       +
       +/*        diff - differential file comparison
       +*
       +*        Uses an algorithm due to Harold Stone, which finds
       +*        a pair of longest identical subsequences in the two
       +*        files.
       +*
       +*        The major goal is to generate the match vector J.
       +*        J[i] is the index of the line in file1 corresponding
       +*        to line i file0. J[i] = 0 if there is no
       +*        such line in file1.
       +*
       +*        Lines are hashed so as to work in core. All potential
       +*        matches are located by sorting the lines of each file
       +*        on the hash (called value). In particular, this
       +*        collects the equivalence classes in file1 together.
       +*        Subroutine equiv replaces the value of each line in
       +*        file0 by the index of the first element of its 
       +*        matching equivalence in (the reordered) file1.
       +*        To save space equiv squeezes file1 into a single
       +*        array member in which the equivalence classes
       +*        are simply concatenated, except that their first
       +*        members are flagged by changing sign.
       +*
       +*        Next the indices that point into member are unsorted into
       +*        array class according to the original order of file0.
       +*
       +*        The cleverness lies in routine stone. This marches
       +*        through the lines of file0, developing a vector klist
       +*        of "k-candidates". At step i a k-candidate is a matched
       +*        pair of lines x,y (x in file0 y in file1) such that
       +*        there is a common subsequence of lenght k
       +*        between the first i lines of file0 and the first y 
       +*        lines of file1, but there is no such subsequence for
       +*        any smaller y. x is the earliest possible mate to y
       +*        that occurs in such a subsequence.
       +*
       +*        Whenever any of the members of the equivalence class of
       +*        lines in file1 matable to a line in file0 has serial number 
       +*        less than the y of some k-candidate, that k-candidate 
       +*        with the smallest such y is replaced. The new 
       +*        k-candidate is chained (via pred) to the current
       +*        k-1 candidate so that the actual subsequence can
       +*        be recovered. When a member has serial number greater
       +*        that the y of all k-candidates, the klist is extended.
       +*        At the end, the longest subsequence is pulled out
       +*        and placed in the array J by unravel.
       +*
       +*        With J in hand, the matches there recorded are
       +*        check'ed against reality to assure that no spurious
       +*        matches have crept in due to hashing. If they have,
       +*        they are broken, and "jackpot " is recorded--a harmless
       +*        matter except that a true match for a spuriously
       +*        mated line may now be unnecessarily reported as a change.
       +*
       +*        Much of the complexity of the program comes simply
       +*        from trying to minimize core utilization and
       +*        maximize the range of doable problems by dynamically
       +*        allocating what is needed and reusing what is not.
       +*        The core requirements for problems larger than somewhat
       +*        are (in words) 2*length(file0) + length(file1) +
       +*        3*(number of k-candidates installed),  typically about
       +*        6n words for files of length n. 
       +*/
       +/* TIDY THIS UP */
       +struct cand {
       +        int x;
       +        int y;
       +        int pred;
       +} cand;
       +struct line {
       +        int serial;
       +        int value;
       +} *file[2], line;
       +int len[2];
       +int binary;
       +struct line *sfile[2];        /*shortened by pruning common prefix and suffix*/
       +int slen[2];
       +int pref, suff;        /*length of prefix and suffix*/
       +int *class;        /*will be overlaid on file[0]*/
       +int *member;        /*will be overlaid on file[1]*/
       +int *klist;                /*will be overlaid on file[0] after class*/
       +struct cand *clist;        /* merely a free storage pot for candidates */
       +int clen;
       +int *J;                /*will be overlaid on class*/
       +long *ixold;        /*will be overlaid on klist*/
       +long *ixnew;        /*will be overlaid on file[1]*/
       +/* END OF SOME TIDYING */
       +
       +static void        
       +sort(struct line *a, int n)        /*shellsort CACM #201*/
       +{
       +        int m;
       +        struct line *ai, *aim, *j, *k;
       +        struct line w;
       +        int i;
       +
       +        m = 0;
       +        for (i = 1; i <= n; i *= 2)
       +                m = 2*i - 1;
       +        for (m /= 2; m != 0; m /= 2) {
       +                k = a+(n-m);
       +                for (j = a+1; j <= k; j++) {
       +                        ai = j;
       +                        aim = ai+m;
       +                        do {
       +                                if (aim->value > ai->value ||
       +                                   aim->value == ai->value &&
       +                                   aim->serial > ai->serial)
       +                                        break;
       +                                w = *ai;
       +                                *ai = *aim;
       +                                *aim = w;
       +
       +                                aim = ai;
       +                                ai -= m;
       +                        } while (ai > a && aim >= ai);
       +                }
       +        }
       +}
       +
       +static void
       +unsort(struct line *f, int l, int *b)
       +{
       +        int *a;
       +        int i;
       +
       +        a = MALLOC(int, (l+1));
       +        for(i=1;i<=l;i++)
       +                a[f[i].serial] = f[i].value;
       +        for(i=1;i<=l;i++)
       +                b[i] = a[i];
       +        FREE(a);
       +}
       +
       +static void
       +prune(void)
       +{
       +        int i,j;
       +
       +        for(pref=0;pref<len[0]&&pref<len[1]&&
       +                file[0][pref+1].value==file[1][pref+1].value;
       +                pref++ ) ;
       +        for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
       +                file[0][len[0]-suff].value==file[1][len[1]-suff].value;
       +                suff++) ;
       +        for(j=0;j<2;j++) {
       +                sfile[j] = file[j]+pref;
       +                slen[j] = len[j]-pref-suff;
       +                for(i=0;i<=slen[j];i++)
       +                        sfile[j][i].serial = i;
       +        }
       +}
       +
       +static void
       +equiv(struct line *a, int n, struct line *b, int m, int *c)
       +{
       +        int i, j;
       +
       +        i = j = 1;
       +        while(i<=n && j<=m) {
       +                if(a[i].value < b[j].value)
       +                        a[i++].value = 0;
       +                else if(a[i].value == b[j].value)
       +                        a[i++].value = j;
       +                else
       +                        j++;
       +        }
       +        while(i <= n)
       +                a[i++].value = 0;
       +        b[m+1].value = 0;
       +        j = 0;
       +        while(++j <= m) {
       +                c[j] = -b[j].serial;
       +                while(b[j+1].value == b[j].value) {
       +                        j++;
       +                        c[j] = b[j].serial;
       +                }
       +        }
       +        c[j] = -1;
       +}
       +
       +static int
       +newcand(int x, int  y, int pred)
       +{
       +        struct cand *q;
       +
       +        clist = REALLOC(clist, struct cand, (clen+1));
       +        q = clist + clen;
       +        q->x = x;
       +        q->y = y;
       +        q->pred = pred;
       +        return clen++;
       +}
       +
       +static int
       +search(int *c, int k, int y)
       +{
       +        int i, j, l;
       +        int t;
       +
       +        if(clist[c[k]].y < y)        /*quick look for typical case*/
       +                return k+1;
       +        i = 0;
       +        j = k+1;
       +        while((l=(i+j)/2) > i) {
       +                t = clist[c[l]].y;
       +                if(t > y)
       +                        j = l;
       +                else if(t < y)
       +                        i = l;
       +                else
       +                        return l;
       +        }
       +        return l+1;
       +}
       +
       +static int
       +stone(int *a, int n, int *b, int *c)
       +{
       +        int i, k,y;
       +        int j, l;
       +        int oldc, tc;
       +        int oldl;
       +
       +        k = 0;
       +        c[0] = newcand(0,0,0);
       +        for(i=1; i<=n; i++) {
       +                j = a[i];
       +                if(j==0)
       +                        continue;
       +                y = -b[j];
       +                oldl = 0;
       +                oldc = c[0];
       +                do {
       +                        if(y <= clist[oldc].y)
       +                                continue;
       +                        l = search(c, k, y);
       +                        if(l!=oldl+1)
       +                                oldc = c[l-1];
       +                        if(l<=k) {
       +                                if(clist[c[l]].y <= y)
       +                                        continue;
       +                                tc = c[l];
       +                                c[l] = newcand(i,y,oldc);
       +                                oldc = tc;
       +                                oldl = l;
       +                        } else {
       +                                c[l] = newcand(i,y,oldc);
       +                                k++;
       +                                break;
       +                        }
       +                } while((y=b[++j]) > 0);
       +        }
       +        return k;
       +}
       +
       +static void
       +unravel(int p)
       +{
       +        int i;
       +        struct cand *q;
       +
       +        for(i=0; i<=len[0]; i++) {
       +                if (i <= pref)
       +                        J[i] = i;
       +                else if (i > len[0]-suff)
       +                        J[i] = i+len[1]-len[0];
       +                else
       +                        J[i] = 0;
       +        }
       +        for(q=clist+p;q->y!=0;q=clist+q->pred)
       +                J[q->x+pref] = q->y+pref;
       +}
       +
       +static void
       +output(void)
       +{
       +        int m, i0, i1, j0, j1;
       +
       +        m = len[0];
       +        J[0] = 0;
       +        J[m+1] = len[1]+1;
       +        if (mode != 'e') {
       +                for (i0 = 1; i0 <= m; i0 = i1+1) {
       +                        while (i0 <= m && J[i0] == J[i0-1]+1)
       +                                i0++;
       +                        j0 = J[i0-1]+1;
       +                        i1 = i0-1;
       +                        while (i1 < m && J[i1+1] == 0)
       +                                i1++;
       +                        j1 = J[i1+1]-1;
       +                        J[i1] = j1;
       +                        change(i0, i1, j0, j1);
       +                }
       +        }
       +        else {
       +                for (i0 = m; i0 >= 1; i0 = i1-1) {
       +                        while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
       +                                i0--;
       +                        j0 = J[i0+1]-1;
       +                        i1 = i0+1;
       +                        while (i1 > 1 && J[i1-1] == 0)
       +                                i1--;
       +                        j1 = J[i1-1]+1;
       +                        J[i1] = j1;
       +                        change(i1 , i0, j1, j0);
       +                }
       +        }
       +        if (m == 0)
       +                change(1, 0, 1, len[1]);
       +        flushchanges();
       +}
       +
       +#define BUF 4096
       +static int
       +cmp(Biobuf* b1, Biobuf* b2)
       +{
       +        int n;
       +        uchar buf1[BUF], buf2[BUF];
       +        int f1, f2;
       +        vlong nc = 1;
       +        uchar *b1s, *b1e, *b2s, *b2e;
       +
       +        f1 = Bfildes(b1);
       +        f2 = Bfildes(b2);
       +        seek(f1, 0, 0);
       +        seek(f2, 0, 0);
       +        b1s = b1e = buf1;
       +        b2s = b2e = buf2;
       +        for(;;){
       +                if(b1s >= b1e){
       +                        if(b1s >= &buf1[BUF])
       +                                b1s = buf1;
       +                        n = read(f1, b1s,  &buf1[BUF] - b1s);
       +                        b1e = b1s + n;
       +                }
       +                if(b2s >= b2e){
       +                        if(b2s >= &buf2[BUF])
       +                                b2s = buf2;
       +                        n = read(f2, b2s,  &buf2[BUF] - b2s);
       +                        b2e = b2s + n;
       +                }
       +                n = b2e - b2s;
       +                if(n > b1e - b1s)
       +                        n = b1e - b1s;
       +                if(n <= 0)
       +                        break;
       +                if(memcmp((void *)b1s, (void *)b2s, n) != 0){
       +                        return 1;
       +                }                
       +                nc += n;
       +                b1s += n;
       +                b2s += n;
       +        }
       +        if(b1e - b1s == b2e - b2s)
       +                return 0;
       +        return 1;        
       +}
       +
       +void
       +diffreg(char *f, char *t)
       +{
       +        Biobuf *b0, *b1;
       +        int k;
       +
       +        binary = 0;
       +        b0 = prepare(0, f);
       +        if (!b0)
       +                return;
       +        b1 = prepare(1, t);
       +        if (!b1) {
       +                FREE(file[0]);
       +                Bterm(b0);
       +                return;
       +        }
       +        if (binary){
       +                /* could use b0 and b1 but this is simpler. */
       +                if (cmp(b0, b1))
       +                        print("binary files %s %s differ\n", f, t);
       +                Bterm(b0);
       +                Bterm(b1);
       +                return;
       +        }
       +        clen = 0;
       +        prune();
       +        sort(sfile[0], slen[0]);
       +        sort(sfile[1], slen[1]);
       +
       +        member = (int *)file[1];
       +        equiv(sfile[0], slen[0], sfile[1], slen[1], member);
       +        member = REALLOC(member, int, slen[1]+2);
       +
       +        class = (int *)file[0];
       +        unsort(sfile[0], slen[0], class);
       +        class = REALLOC(class, int, slen[0]+2);
       +
       +        klist = MALLOC(int, slen[0]+2);
       +        clist = MALLOC(struct cand, 1);
       +        k = stone(class, slen[0], member, klist);
       +        FREE(member);
       +        FREE(class);
       +
       +        J = MALLOC(int, len[0]+2);
       +        unravel(klist[k]);
       +        FREE(clist);
       +        FREE(klist);
       +
       +        ixold = MALLOC(long, len[0]+2);
       +        ixnew = MALLOC(long, len[1]+2);
       +        Bseek(b0, 0, 0); Bseek(b1, 0, 0);
       +        check(b0, b1);
       +        output();
       +        FREE(J); FREE(ixold); FREE(ixnew);
       +        Bterm(b0); Bterm(b1);                        /* ++++ */
       +}
   DIR diff --git a/diff/main.c b/diff/main.c
       @@ -0,0 +1,270 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include "diff.h"
       +
       +#define        DIRECTORY(s)                ((s)->qid.type&QTDIR)
       +#define        REGULAR_FILE(s)                ((s)->type == 'M' && !DIRECTORY(s))
       +
       +Biobuf        stdout;
       +
       +static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
       +static int whichtmp;
       +static char *progname;
       +static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n";
       +
       +static void
       +rmtmpfiles(void)
       +{
       +        while (whichtmp > 0) {
       +                whichtmp--;
       +                remove(tmp[whichtmp]);
       +        }
       +}
       +
       +void        
       +done(int status)
       +{
       +        rmtmpfiles();
       +        switch(status)
       +        {
       +        case 0:
       +                exits("");
       +        case 1:
       +                exits("some");
       +        default:
       +                exits("error");
       +        }
       +        /*NOTREACHED*/
       +}
       +
       +void
       +panic(int status, char *fmt, ...)
       +{
       +        va_list arg;
       +
       +        Bflush(&stdout);
       +
       +        fprint(2, "%s: ", progname);
       +        va_start(arg, fmt);
       +        vfprint(2, fmt, arg);
       +        va_end(arg);
       +        if (status)
       +                done(status);
       +                /*NOTREACHED*/
       +}
       +
       +static int
       +catch(void *a, char *msg)
       +{
       +        USED(a);
       +        panic(2, msg);
       +        return 1;
       +}
       +
       +int
       +mkpathname(char *pathname, char *path, char *name)
       +{
       +        if (strlen(path) + strlen(name) > MAXPATHLEN) {
       +                panic(0, "pathname %s/%s too long\n", path, name);
       +                return 1;
       +        }
       +        sprint(pathname, "%s/%s", path, name);
       +        return 0;
       +}
       +        
       +static char *
       +mktmpfile(int input, Dir **sb)
       +{
       +        int fd, i;
       +        char *p;
       +        char buf[8192];
       +
       +        atnotify(catch, 1);
       +/*
       +        p = mktemp(tmp[whichtmp++]);
       +        fd = create(p, OWRITE, 0600);
       +*/
       +        fd = mkstemp(p=tmp[whichtmp++]);
       +        if (fd < 0) {
       +                panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
       +                return 0;
       +        }
       +        while ((i = read(input, buf, sizeof(buf))) > 0) {
       +                if ((i = write(fd, buf, i)) < 0)
       +                        break;
       +        }
       +        *sb = dirfstat(fd);
       +        close(fd);
       +        if (i < 0) {
       +                panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
       +                return 0;
       +        }
       +        return p;
       +}
       +
       +static char *
       +statfile(char *file, Dir **sb)
       +{
       +        Dir *dir;
       +        int input;
       +
       +        dir = dirstat(file);
       +        if(dir == nil) {
       +                if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
       +                        panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
       +                        return 0;
       +                }
       +                free(dir);
       +                return mktmpfile(0, sb);
       +        }
       +        else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
       +                free(dir);
       +                if ((input = open(file, OREAD)) == -1) {
       +                        panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
       +                        return 0;
       +                }
       +                file = mktmpfile(input, sb);
       +                close(input);
       +        }
       +        else
       +                *sb = dir;
       +        return file;
       +}
       +
       +void
       +diff(char *f, char *t, int level)
       +{
       +        char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
       +        Dir *fsb, *tsb;
       +
       +        if ((fp = statfile(f, &fsb)) == 0)
       +                goto Return;
       +        if ((tp = statfile(t, &tsb)) == 0){
       +                free(fsb);
       +                goto Return;
       +        }
       +        if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
       +                if (rflag || level == 0)
       +                        diffdir(fp, tp, level);
       +                else
       +                        Bprint(&stdout, "Common subdirectories: %s and %s\n",
       +                                fp, tp);
       +        }
       +        else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
       +                diffreg(fp, tp);
       +        else {
       +                if (REGULAR_FILE(fsb)) {
       +                        if ((p = utfrrune(f, '/')) == 0)
       +                                p = f;
       +                        else
       +                                p++;
       +                        if (mkpathname(tb, tp, p) == 0)
       +                                diffreg(fp, tb);
       +                }
       +                else {
       +                        if ((p = utfrrune(t, '/')) == 0)
       +                                p = t;
       +                        else
       +                                p++;
       +                        if (mkpathname(fb, fp, p) == 0)
       +                                diffreg(fb, tp);
       +                }
       +        }
       +        free(fsb);
       +        free(tsb);
       +Return:
       +        rmtmpfiles();
       +}
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        char *p;
       +        int i;
       +        Dir *fsb, *tsb;
       +        extern int _p9usepwlibrary;
       +        
       +        _p9usepwlibrary = 0;
       +        Binit(&stdout, 1, OWRITE);
       +        progname = *argv;
       +        while (--argc && (*++argv)[0] == '-' && (*argv)[1]) {
       +                for (p = *argv+1; *p; p++) {
       +                        switch (*p) {
       +
       +                        case 'e':
       +                        case 'f':
       +                        case 'n':
       +                        case 'c':
       +                        case 'a':
       +                                mode = *p;
       +                                break;
       +
       +                        case 'w':
       +                                bflag = 2;
       +                                break;
       +
       +                        case 'b':
       +                                bflag = 1;
       +                                break;
       +
       +                        case 'r':
       +                                rflag = 1;
       +                                mflag = 1;
       +                                break;
       +
       +                        case 'm':
       +                                mflag = 1;        
       +                                break;
       +
       +                        case 'h':
       +                        default:
       +                                progname = "Usage";
       +                                panic(2, usage);
       +                        }
       +                }
       +        }
       +        if (argc < 2)
       +                panic(2, usage, progname);
       +        if ((tsb = dirstat(argv[argc-1])) == nil)
       +                panic(2, "can't stat %s\n", argv[argc-1]);
       +        if (argc > 2) {
       +                if (!DIRECTORY(tsb))
       +                        panic(2, usage, progname);
       +                mflag = 1;
       +        }
       +        else {
       +                if ((fsb = dirstat(argv[0])) == nil)
       +                        panic(2, "can't stat %s\n", argv[0]);
       +                if (DIRECTORY(fsb) && DIRECTORY(tsb))
       +                        mflag = 1;
       +                free(fsb);
       +        }
       +        free(tsb);
       +        for (i = 0; i < argc-1; i++)
       +                diff(argv[i], argv[argc-1], 0);
       +        done(anychange);
       +        /*NOTREACHED*/
       +}
       +
       +static char noroom[] = "out of memory - try diff -h\n";
       +
       +void *
       +emalloc(unsigned n)
       +{
       +        register void *p;
       +
       +        if ((p = malloc(n)) == 0)
       +                panic(2, noroom);
       +        return p;
       +}
       +
       +void *
       +erealloc(void *p, unsigned n)
       +{
       +        register void *rp;
       +
       +        if ((rp = realloc(p, n)) == 0)
       +                panic(2, noroom);
       +        return rp;
       +}
   DIR diff --git a/join/Makefile b/join/Makefile
       @@ -0,0 +1,10 @@
       +# join - join unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = join
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/join/join.1 b/join/join.1
       @@ -0,0 +1,147 @@
       +.TH JOIN 1
       +.CT 1 files
       +.SH NAME
       +join \- relational database operator
       +.SH SYNOPSIS
       +.B join
       +[
       +.I options
       +]
       +.I file1 file2
       +.SH DESCRIPTION
       +.I Join
       +forms, on the standard output,
       +a join
       +of the two relations specified by the lines of
       +.I file1
       +and
       +.IR file2 .
       +If one of the file names is
       +.LR - ,
       +the standard input is used.
       +.PP
       +.I File1
       +and
       +.I file2
       +must be sorted in increasing
       +.SM ASCII 
       +collating
       +sequence on the fields
       +on which they are to be joined,
       +normally the first in each line.
       +.PP
       +There is one line in the output
       +for each pair of lines in
       +.I file1
       +and
       +.I file2
       +that have identical join fields.
       +The output line normally consists of the common field,
       +then the rest of the line from
       +.IR file1 ,
       +then the rest of the line from
       +.IR file2 .
       +.PP
       +Input fields are normally separated spaces or tabs;
       +output fields by space.
       +In this case, multiple separators count as one, and
       +leading separators are discarded.
       +.PP
       +The following options are recognized, with POSIX syntax.
       +.TP
       +.BI -a " n
       +In addition to the normal output,
       +produce a line for each unpairable line in file
       +.IR n ,
       +where
       +.I n
       +is 1 or 2.
       +.TP
       +.BI -v " n
       +Like
       +.BR -a ,
       +omitting output for paired lines.
       +.TP
       +.BI -e " s
       +Replace empty output fields by string
       +.IR s .
       +.TP
       +.BI -1 " m
       +.br
       +.ns
       +.TP
       +.BI -2 " m
       +Join on the
       +.IR m th
       +field of
       +.I file1
       +or
       +.IR file2 .
       +.TP
       +.BI -j "n m"
       +Archaic equivalent for
       +.BI - n " m"\f1.
       +.TP
       +.BI -o fields
       +Each output line comprises the designated fields.
       +The comma-separated field designators are either
       +.BR 0 ,
       +meaning the join field, or have the form
       +.IR n . m ,
       +where
       +.I n
       +is a file number and
       +.I m
       +is a field number.
       +Archaic usage allows separate arguments for field designators.
       +.PP
       +.TP
       +.BI -t c
       +Use character
       +.I c
       +as the only separator (tab character) on input and output.
       +Every appearance of
       +.I c
       +in a line is significant.
       +.SH EXAMPLES
       +.TP
       +.L
       +sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays
       +Add birthdays to the
       +.B /etc/passwd
       +file, leaving unknown
       +birthdays empty.
       +The layout of 
       +.B /adm/users
       +is given in
       +.IR passwd (5);
       +.B bdays
       +contains sorted lines like
       +.LR "ken:Feb\ 4,\ 1953" .
       +.TP
       +.L
       +tr : ' ' </etc/passwd | sort -k 3 3 >temp
       +.br
       +.ns
       +.TP
       +.L
       +join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2'
       +Print all pairs of users with identical userids.
       +.SH SOURCE
       +.B \*9/src/cmd/join.c
       +.SH "SEE ALSO"
       +.IR sort (1), 
       +.IR comm (1), 
       +.IR awk (1)
       +.SH BUGS
       +With default field separation,
       +the collating sequence is that of
       +.BI "sort -b"
       +.BI -k y , y\f1;
       +with
       +.BR -t ,
       +the sequence is that of
       +.BI "sort -t" x
       +.BI -k y , y\f1.
       +.PP
       +One of the files must be randomly accessible.
   DIR diff --git a/join/join.c b/join/join.c
       @@ -0,0 +1,369 @@
       +/*        join F1 F2 on stuff */
       +#include <u.h>
       +#include <libc.h>
       +#include <stdio.h>
       +#include <ctype.h>
       +#define F1 0
       +#define F2 1
       +#define F0 3
       +#define        NFLD        100        /* max field per line */
       +#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
       +FILE *f[2];
       +Rune buf[2][BUFSIZ];        /*input lines */
       +Rune *ppi[2][NFLD+1];        /* pointers to fields in lines */
       +Rune *s1,*s2;
       +#define j1 joinj1
       +#define j2 joinj2
       +
       +int        j1        = 1;        /* join of this field of file 1 */
       +int        j2        = 1;        /* join of this field of file 2 */
       +int        olist[2*NFLD];        /* output these fields */
       +int        olistf[2*NFLD];        /* from these files */
       +int        no;                /* number of entries in olist */
       +Rune        sep1        = ' ';        /* default field separator */
       +Rune        sep2        = '\t';
       +char *sepstr=" ";
       +int        discard;        /* count of truncated lines */
       +Rune        null[BUFSIZ]/*        = L""*/;
       +int        a1;
       +int         a2;
       +
       +char *getoptarg(int*, char***);
       +void output(int, int);
       +int input(int);
       +void oparse(char*);
       +void error(char*, char*);
       +void seek1(void), seek2(void);
       +Rune *strtorune(Rune *, char *);
       +
       +
       +void
       +main(int argc, char **argv)
       +{
       +        int i;
       +
       +        while (argc > 1 && argv[1][0] == '-') {
       +                if (argv[1][1] == '\0')
       +                        break;
       +                switch (argv[1][1]) {
       +                case '-':
       +                        argc--;
       +                        argv++;
       +                        goto proceed;
       +                case 'a':
       +                        switch(*getoptarg(&argc, &argv)) {
       +                        case '1':
       +                                a1++;
       +                                break;
       +                        case '2':
       +                                a2++;
       +                                break;
       +                        default:
       +                                error("incomplete option -a","");
       +                        }
       +                        break;
       +                case 'e':
       +                        strtorune(null, getoptarg(&argc, &argv));
       +                        break;
       +                case 't':
       +                        sepstr=getoptarg(&argc, &argv);
       +                        chartorune(&sep1, sepstr);
       +                        sep2 = sep1;
       +                        break;
       +                case 'o':
       +                        if(argv[1][2]!=0 ||
       +                           argc>2 && strchr(argv[2],',')!=0)
       +                                oparse(getoptarg(&argc, &argv));
       +                        else for (no = 0; no<2*NFLD && argc>2; no++){
       +                                if (argv[2][0] == '1' && argv[2][1] == '.') {
       +                                        olistf[no] = F1;
       +                                        olist[no] = atoi(&argv[2][2]);
       +                                } else if (argv[2][0] == '2' && argv[2][1] == '.') {
       +                                        olist[no] = atoi(&argv[2][2]);
       +                                        olistf[no] = F2;
       +                                } else if (argv[2][0] == '0')
       +                                        olistf[no] = F0;
       +                                else
       +                                        break;
       +                                argc--;
       +                                argv++;
       +                        }
       +                        break;
       +                case 'j':
       +                        if(argc <= 2)
       +                                break;
       +                        if (argv[1][2] == '1')
       +                                j1 = atoi(argv[2]);
       +                        else if (argv[1][2] == '2')
       +                                j2 = atoi(argv[2]);
       +                        else
       +                                j1 = j2 = atoi(argv[2]);
       +                        argc--;
       +                        argv++;
       +                        break;
       +                case '1':
       +                        j1 = atoi(getoptarg(&argc, &argv));
       +                        break;
       +                case '2':
       +                        j2 = atoi(getoptarg(&argc, &argv));
       +                        break;
       +                }
       +                argc--;
       +                argv++;
       +        }
       +proceed:
       +        for (i = 0; i < no; i++)
       +                if (olist[i]-- > NFLD)        /* 0 origin */
       +                        error("field number too big in -o","");
       +        if (argc != 3)
       +                error("usage: join [-1 x -2 y] [-o list] file1 file2","");
       +        j1--;
       +        j2--;        /* everyone else believes in 0 origin */
       +        s1 = ppi[F1][j1];
       +        s2 = ppi[F2][j2];
       +        if (strcmp(argv[1], "-") == 0)
       +                f[F1] = stdin;
       +        else if ((f[F1] = fopen(argv[1], "r")) == 0)
       +                error("can't open %s", argv[1]);
       +        if(strcmp(argv[2], "-") == 0) {
       +                f[F2] = stdin;
       +        } else if ((f[F2] = fopen(argv[2], "r")) == 0)
       +                error("can't open %s", argv[2]);
       +
       +        if(ftell(f[F2]) >= 0)
       +                seek2();
       +        else if(ftell(f[F1]) >= 0)
       +                seek1();
       +        else
       +                error("neither file is randomly accessible","");
       +        if (discard)
       +                error("some input line was truncated", "");
       +        exits("");
       +}
       +int runecmp(Rune *a, Rune *b){
       +        while(*a==*b){
       +                if(*a=='\0') return 0;
       +                a++;
       +                b++;
       +        }
       +        if(*a<*b) return -1;
       +        return 1;
       +}
       +char *runetostr(char *buf, Rune *r){
       +        char *s;
       +        for(s=buf;*r;r++) s+=runetochar(s, r);
       +        *s='\0';
       +        return buf;
       +}
       +Rune *strtorune(Rune *buf, char *s){
       +        Rune *r;
       +        for(r=buf;*s;r++) s+=chartorune(r, s);
       +        *r='\0';
       +        return buf;
       +}
       +/* lazy.  there ought to be a clean way to combine seek1 & seek2 */
       +#define get1() n1=input(F1)
       +#define get2() n2=input(F2)
       +void
       +seek2(void)
       +{
       +        int n1, n2;
       +        int top2=0;
       +        int bot2 = ftell(f[F2]);
       +        get1();
       +        get2();
       +        while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
       +                if(n1>0 && n2>0 && comp()>0 || n1==0) {
       +                        if(a2) output(0, n2);
       +                        bot2 = ftell(f[F2]);
       +                        get2();
       +                } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
       +                        if(a1) output(n1, 0);
       +                        get1();
       +                } else /*(n1>0 && n2>0 && comp()==0)*/ {
       +                        while(n2>0 && comp()==0) {
       +                                output(n1, n2);
       +                                top2 = ftell(f[F2]);
       +                                get2();
       +                        }
       +                        fseek(f[F2], bot2, 0);
       +                        get2();
       +                        get1();
       +                        for(;;) {
       +                                if(n1>0 && n2>0 && comp()==0) {
       +                                        output(n1, n2);
       +                                        get2();
       +                                } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
       +                                        fseek(f[F2], bot2, 0);
       +                                        get2();
       +                                        get1();
       +                                } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
       +                                        fseek(f[F2], top2, 0);
       +                                        bot2 = top2;
       +                                        get2();
       +                                        break;
       +                                }
       +                        }
       +                }
       +        }
       +}
       +void
       +seek1(void)
       +{
       +        int n1, n2;
       +        int top1=0;
       +        int bot1 = ftell(f[F1]);
       +        get1();
       +        get2();
       +        while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
       +                if(n1>0 && n2>0 && comp()>0 || n1==0) {
       +                        if(a2) output(0, n2);
       +                        get2();
       +                } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
       +                        if(a1) output(n1, 0);
       +                        bot1 = ftell(f[F1]);
       +                        get1();
       +                } else /*(n1>0 && n2>0 && comp()==0)*/ {
       +                        while(n2>0 && comp()==0) {
       +                                output(n1, n2);
       +                                top1 = ftell(f[F1]);
       +                                get1();
       +                        }
       +                        fseek(f[F1], bot1, 0);
       +                        get2();
       +                        get1();
       +                        for(;;) {
       +                                if(n1>0 && n2>0 && comp()==0) {
       +                                        output(n1, n2);
       +                                        get1();
       +                                } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
       +                                        fseek(f[F1], bot1, 0);
       +                                        get2();
       +                                        get1();
       +                                } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
       +                                        fseek(f[F1], top1, 0);
       +                                        bot1 = top1;
       +                                        get1();
       +                                        break;
       +                                }
       +                        }
       +                }
       +        }
       +}
       +
       +int
       +input(int n)                /* get input line and split into fields */
       +{
       +        register int i, c;
       +        Rune *bp;
       +        Rune **pp;
       +        char line[BUFSIZ];
       +
       +        bp = buf[n];
       +        pp = ppi[n];
       +        if (fgets(line, BUFSIZ, f[n]) == 0)
       +                return(0);
       +        strtorune(bp, line);
       +        i = 0;
       +        do {
       +                i++;
       +                if (sep1 == ' ')        /* strip multiples */
       +                        while ((c = *bp) == sep1 || c == sep2)
       +                                bp++;        /* skip blanks */
       +                *pp++ = bp;        /* record beginning */
       +                while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
       +                        bp++;
       +                *bp++ = '\0';        /* mark end by overwriting blank */
       +        } while (c != '\n' && c != '\0' && i < NFLD-1);
       +        if (c != '\n')
       +                discard++;
       +
       +        *pp = 0;
       +        return(i);
       +}
       +
       +void
       +output(int on1, int on2)        /* print items from olist */
       +{
       +        int i;
       +        Rune *temp;
       +        char buf[BUFSIZ];
       +
       +        if (no <= 0) {        /* default case */
       +                printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
       +                for (i = 0; i < on1; i++)
       +                        if (i != j1)
       +                                printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
       +                for (i = 0; i < on2; i++)
       +                        if (i != j2)
       +                                printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
       +                printf("\n");
       +        } else {
       +                for (i = 0; i < no; i++) {
       +                        if (olistf[i]==F0 && on1>j1)
       +                                temp = ppi[F1][j1];
       +                        else if (olistf[i]==F0 && on2>j2)
       +                                temp = ppi[F2][j2];
       +                        else {
       +                                temp = ppi[olistf[i]][olist[i]];
       +                                if(olistf[i]==F1 && on1<=olist[i] ||
       +                                   olistf[i]==F2 && on2<=olist[i] ||
       +                                   *temp==0)
       +                                        temp = null;
       +                        }
       +                        printf("%s", runetostr(buf, temp));
       +                        if (i == no - 1)
       +                                printf("\n");
       +                        else
       +                                printf("%s", sepstr);
       +                }
       +        }
       +}
       +
       +void
       +error(char *s1, char *s2)
       +{
       +        fprintf(stderr, "join: ");
       +        fprintf(stderr, s1, s2);
       +        fprintf(stderr, "\n");
       +        exits(s1);
       +}
       +
       +char *
       +getoptarg(int *argcp, char ***argvp)
       +{
       +        int argc = *argcp;
       +        char **argv = *argvp;
       +        if(argv[1][2] != 0)
       +                return &argv[1][2];
       +        if(argc<=2 || argv[2][0]=='-')
       +                error("incomplete option %s", argv[1]);
       +        *argcp = argc-1;
       +        *argvp = ++argv;
       +        return argv[1];
       +}
       +
       +void
       +oparse(char *s)
       +{
       +        for (no = 0; no<2*NFLD && *s; no++, s++) {
       +                switch(*s) {
       +                case 0:
       +                        return;
       +                case '0':
       +                        olistf[no] = F0;
       +                        break;
       +                case '1':
       +                case '2':
       +                        if(s[1] == '.' && isdigit((uchar)s[2])) {
       +                                olistf[no] = *s=='1'? F1: F2;
       +                                olist[no] = atoi(s += 2);
       +                                break;
       +                        } /* fall thru */
       +                default:
       +                        error("invalid -o list", "");
       +                }
       +                if(s[1] == ',')
       +                        s++;
       +        }
       +}
   DIR diff --git a/lib9/utf.h b/lib9/utf.h
       @@ -11,7 +11,8 @@ enum
                UTFmax                = 3,                /* maximum bytes per rune */
                Runesync        = 0x80,                /* cannot represent part of a UTF sequence (<) */
                Runeself        = 0x80,                /* rune and UTF sequences are the same (<) */
       -        Runeerror        = 0xFFFD                /* decoding error in UTF */
       +        Runeerror        = 0xFFFD,                /* decoding error in UTF */
       +        Runemax = 0x10FFFF        /* maximum rune value */
        };
        
        /* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
   DIR diff --git a/look/Makefile b/look/Makefile
       @@ -0,0 +1,10 @@
       +# look - look unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = look
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/look/look.1 b/look/look.1
       @@ -0,0 +1,85 @@
       +.TH LOOK 1
       +.SH NAME
       +look \- find lines in a sorted list
       +.SH SYNOPSIS
       +.B look
       +[
       +.BI -dfnixt c
       +]
       +[
       +.I string
       +]
       +[
       +.I file
       +]
       +.SH DESCRIPTION
       +.I Look
       +consults a sorted
       +.I file
       +and prints all lines that begin with
       +.IR string .
       +It uses binary search.
       +.PP
       +The following options are recognized.
       +Options
       +.B dfnt
       +affect comparisons as in
       +.IR  sort (1).
       +.TP
       +.B -i
       +Interactive.
       +There is no
       +.I string
       +argument; instead
       +.I look
       +takes lines from the standard input as strings to be looked up.
       +.TP
       +.B -x
       +Exact.
       +Print only lines of the file whose key matches
       +.I string
       +exactly.
       +.TP
       +.B  -d
       +`Directory' order:
       +only letters, digits,
       +tabs and blanks participate in comparisons.
       +.TP
       +.B  -f
       +Fold.
       +Upper case letters compare equal to lower case.
       +.TP
       +.B -n
       +Numeric comparison with initial string of digits, optional minus sign,
       +and optional decimal point.
       +.TP
       +.BR -t [ \f2c\f1 ]
       +Character
       +.I c
       +terminates the sort key in the
       +.IR file .
       +By default, tab terminates the key.  If
       +.I c
       +is missing the entire line comprises the key.
       +.PP
       +If no
       +.I file
       +is specified,
       +.B /lib/words
       +is assumed, with collating sequence
       +.BR df .
       +.SH FILES
       +.B /lib/words
       +.SH SOURCE
       +.B \*9/src/cmd/look.c
       +.SH "SEE ALSO"
       +.IR sort (1), 
       +.IR grep (1)
       +.SH DIAGNOSTICS
       +The exit status is
       +.RB `` "not found" ''
       +if no match is found, and
       +.RB `` "no dictionary" ''
       +if
       +.I file
       +or the default dictionary cannot be opened.
   DIR diff --git a/look/look.c b/look/look.c
       @@ -0,0 +1,349 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +        /* Macros for Rune support of ctype.h-like functions */
       +
       +#undef isupper
       +#undef islower
       +#undef isalpha
       +#undef isdigit
       +#undef isalnum
       +#undef isspace
       +#undef tolower
       +#define        isupper(r)        ('A' <= (r) && (r) <= 'Z')
       +#define        islower(r)        ('a' <= (r) && (r) <= 'z')
       +#define        isalpha(r)        (isupper(r) || islower(r))
       +#define        islatin1(r)        (0xC0 <= (r) && (r) <= 0xFF)
       +
       +#define        isdigit(r)        ('0' <= (r) && (r) <= '9')
       +
       +#define        isalnum(r)        (isalpha(r) || isdigit(r))
       +
       +#define        isspace(r)        ((r) == ' ' || (r) == '\t' \
       +                        || (0x0A <= (r) && (r) <= 0x0D))
       +
       +#define        tolower(r)        ((r)-'A'+'a')
       +
       +#define        sgn(v)                ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
       +
       +#define        WORDSIZ        4000
       +char        *filename = "#9/lib/words";
       +Biobuf        *dfile;
       +Biobuf        bout;
       +Biobuf        bin;
       +
       +int        fold;
       +int        direc;
       +int        exact;
       +int        iflag;
       +int        rev = 1;        /*-1 for reverse-ordered file, not implemented*/
       +int        (*compare)(Rune*, Rune*);
       +Rune        tab = '\t';
       +Rune        entry[WORDSIZ];
       +Rune        word[WORDSIZ];
       +Rune        key[50], orig[50];
       +Rune        latin_fold_tab[] =
       +{
       +/*        Table to fold latin 1 characters to ASCII equivalents
       +                        based at Rune value 0xc0
       +
       +         À    Á    Â    Ã    Ä    Å    Æ    Ç
       +         È    É    Ê    Ë    Ì    Í    Î    Ï
       +         Ð    Ñ    Ò    Ó    Ô    Õ    Ö    ×
       +         Ø    Ù    Ú    Û    Ü    Ý    Þ    ß
       +         à    á    â    ã    ä    å    æ    ç
       +         è    é    ê    ë    ì    í    î    ï
       +         ð    ñ    ò    ó    ô    õ    ö    ÷
       +         ø    ù    ú    û    ü    ý    þ    ÿ
       +*/
       +        'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
       +        'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
       +        'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
       +        'o', 'u', 'u', 'u', 'u', 'y',  0 ,  0 ,
       +        'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
       +        'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
       +        'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
       +        'o', 'u', 'u', 'u', 'u', 'y',  0 , 'y',
       +};
       +
       +int        locate(void);
       +int        acomp(Rune*, Rune*);
       +int        getword(Biobuf*, Rune *rp, int n);
       +void        torune(char*, Rune*);
       +void        rcanon(Rune*, Rune*);
       +int        ncomp(Rune*, Rune*);
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        int n;
       +
       +        filename = unsharp(filename);
       +
       +        Binit(&bin, 0, OREAD);
       +        Binit(&bout, 1, OWRITE);
       +        compare = acomp;
       +        ARGBEGIN{
       +        case 'd':
       +                direc++;
       +                break;
       +        case 'f':
       +                fold++;
       +                break;
       +        case 'i': 
       +                iflag++;
       +                break;
       +        case 'n':
       +                compare = ncomp;
       +                break;
       +        case 't':
       +                chartorune(&tab,ARGF());
       +                break;
       +        case 'x':
       +                exact++;
       +                break;
       +        default:
       +                fprint(2, "%s: bad option %c\n", argv0, ARGC());
       +                fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
       +                exits("usage");
       +        } ARGEND
       +        if(!iflag){
       +                if(argc >= 1) {
       +                        torune(argv[0], orig);
       +                        argv++;
       +                        argc--;
       +                } else
       +                        iflag++;
       +        }
       +        if(argc < 1) {
       +                direc++;
       +                fold++;
       +        } else 
       +                filename = argv[0];
       +        if (!iflag)
       +                rcanon(orig, key);
       +        dfile = Bopen(filename, OREAD);
       +        if(dfile == 0) {
       +                fprint(2, "look: can't open %s\n", filename);
       +                exits("no dictionary");
       +        }
       +        if(!iflag)
       +                if(!locate())
       +                        exits("not found");
       +        do {
       +                if(iflag) {
       +                        Bflush(&bout);
       +                        if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
       +                                exits(0);
       +                        rcanon(orig, key);
       +                        if(!locate())
       +                                continue;
       +                }
       +                if (!exact || !acomp(word, key))
       +                        Bprint(&bout, "%S\n", entry);
       +                while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
       +                        rcanon(entry, word);
       +                        n = compare(key, word);
       +                        switch(n) {
       +                        case -1:
       +                                if(exact)
       +                                        break;
       +                        case 0:
       +                                if (!exact || !acomp(word, orig))
       +                                        Bprint(&bout, "%S\n", entry);
       +                                continue;
       +                        }
       +                        break;
       +                }
       +        } while(iflag);
       +        exits(0);
       +}
       +
       +int
       +locate(void)
       +{
       +        vlong top, bot, mid;
       +        int c;
       +        int n;
       +
       +        bot = 0;
       +        top = Bseek(dfile, 0L, 2);
       +        for(;;) {
       +                mid = (top+bot) / 2;
       +                Bseek(dfile, mid, 0);
       +                do
       +                        c = Bgetrune(dfile);
       +                while(c>=0 && c!='\n');
       +                mid = Boffset(dfile);
       +                if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
       +                        break;
       +                rcanon(entry, word);
       +                n = compare(key, word);
       +                switch(n) {
       +                case -2:
       +                case -1:
       +                case 0:
       +                        if(top <= mid)
       +                                break;
       +                        top = mid;
       +                        continue;
       +                case 1:
       +                case 2:
       +                        bot = mid;
       +                        continue;
       +                }
       +                break;
       +        }
       +        Bseek(dfile, bot, 0);
       +        while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
       +                rcanon(entry, word);
       +                n = compare(key, word);
       +                switch(n) {
       +                case -2:
       +                        return 0;
       +                case -1:
       +                        if(exact)
       +                                return 0;
       +                case 0:
       +                        return 1;
       +                case 1:
       +                case 2:
       +                        continue;
       +                }
       +        }
       +        return 0;
       +}
       +
       +/*
       + *        acomp(s, t) returns:
       + *                -2 if s strictly precedes t
       + *                -1 if s is a prefix of t
       + *                0 if s is the same as t
       + *                1 if t is a prefix of s
       + *                2 if t strictly precedes s
       + */
       +
       +int
       +acomp(Rune *s, Rune *t)
       +{
       +        int cs, ct;
       +
       +        for(;;) {
       +                cs = *s;
       +                ct = *t;
       +                if(cs != ct)
       +                        break;
       +                if(cs == 0)
       +                        return 0;
       +                s++;
       +                t++;
       +        }
       +        if(cs == 0)
       +                return -1;
       +        if(ct == 0)
       +                return 1;
       +        if(cs < ct)
       +                return -2;
       +        return 2;
       +}
       +
       +void
       +torune(char *old, Rune *new)
       +{
       +        do old += chartorune(new, old);
       +        while(*new++);
       +}
       +
       +void
       +rcanon(Rune *old, Rune *new)
       +{
       +        Rune r;
       +
       +        while((r = *old++) && r != tab) {
       +                if (islatin1(r) && latin_fold_tab[r-0xc0])
       +                                r = latin_fold_tab[r-0xc0];
       +                if(direc)
       +                        if(!(isalnum(r) || r == ' ' || r == '\t'))
       +                                continue;
       +                if(fold)
       +                        if(isupper(r))
       +                                r = tolower(r);
       +                *new++ = r;
       +        }
       +        *new = 0;
       +}
       +
       +int
       +ncomp(Rune *s, Rune *t)
       +{
       +        Rune *is, *it, *js, *jt;
       +        int a, b;
       +        int ssgn, tsgn;
       +
       +        while(isspace(*s))
       +                s++;
       +        while(isspace(*t))
       +                t++;
       +        ssgn = tsgn = -2*rev;
       +        if(*s == '-') {
       +                s++;
       +                ssgn = -ssgn;
       +        }
       +        if(*t == '-') {
       +                t++;
       +                tsgn = -tsgn;
       +        }
       +        for(is = s; isdigit(*is); is++)
       +                ;
       +        for(it = t; isdigit(*it); it++)
       +                ;
       +        js = is;
       +        jt = it;
       +        a = 0;
       +        if(ssgn == tsgn)
       +                while(it>t && is>s)
       +                        if(b = *--it - *--is)
       +                                a = b;
       +        while(is > s)
       +                if(*--is != '0')
       +                        return -ssgn;
       +        while(it > t)
       +                if(*--it != '0')
       +                        return tsgn;
       +        if(a)
       +                return sgn(a)*ssgn;
       +        if(*(s=js) == '.')
       +                s++;
       +        if(*(t=jt) == '.')
       +                t++;
       +        if(ssgn == tsgn)
       +                while(isdigit(*s) && isdigit(*t))
       +                        if(a = *t++ - *s++)
       +                                return sgn(a)*ssgn;
       +        while(isdigit(*s))
       +                if(*s++ != '0')
       +                        return -ssgn;
       +        while(isdigit(*t))
       +                if(*t++ != '0')
       +                        return tsgn;
       +        return 0;
       +}
       +
       +int
       +getword(Biobuf *f, Rune *rp, int n)
       +{
       +        long c;
       +
       +        while(n-- > 0) {
       +                c = Bgetrune(f);
       +                if(c < 0)
       +                        return 0;
       +                if(c == '\n') {
       +                        *rp = '\0';
       +                        return 1;
       +                }
       +                *rp++ = c;
       +        }
       +        fprint(2, "Look: word too long.  Bailing out.\n");
       +        return 0;
       +}
   DIR diff --git a/pbd/Makefile b/pbd/Makefile
       @@ -0,0 +1,10 @@
       +# pbd - pbd unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = pbd
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/pbd/pbd.1 b/pbd/pbd.1
   DIR diff --git a/pbd/pbd.c b/pbd/pbd.c
       @@ -0,0 +1,19 @@
       +#include <u.h>
       +#include <libc.h>
       +
       +void
       +main(void)
       +{
       +        char buf[512], *p;
       +
       +        p = "???";
       +        if(getwd(buf, sizeof buf)){
       +                p = strrchr(buf, '/');
       +                if(p == nil)
       +                        p = buf;
       +                else if(p>buf || p[1]!='\0')
       +                        p++;
       +        }
       +        write(1, p, strlen(p));
       +        exits(0);
       +}        
   DIR diff --git a/rc/Makefile b/rc/Makefile
       @@ -46,7 +46,7 @@ uninstall:
                @${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
        
        clean:
       -        rm -f ${OFILES} ${TARG} y.tab.c y.tab.h
       +        rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h
        
        ${TARG}: ${OFILES}
                @echo LD ${TARG}
   DIR diff --git a/split/Makefile b/split/Makefile
       @@ -0,0 +1,10 @@
       +# split - split unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = split
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/split/split.1 b/split/split.1
       @@ -0,0 +1,82 @@
       +.TH SPLIT 1
       +.CT 1 files
       +.SH NAME
       +split \- split a file into pieces
       +.SH SYNOPSIS
       +.B split
       +[
       +.I option ...
       +]
       +[
       +.I file
       +]
       +.SH DESCRIPTION
       +.I Split
       +reads
       +.I file
       +(standard input by default)
       +and writes it in pieces of 1000
       +lines per output file.
       +The names of the
       +output files are
       +.BR xaa ,
       +.BR xab ,
       +and so on to
       +.BR xzz .
       +The options are
       +.TP
       +.BI -n " n"
       +Split into
       +.IR n -line
       +pieces.
       +.TP
       +.BI -l " n"
       +Synonym for
       +.B -n
       +.IR n ,
       +a nod to Unix's syntax.
       +.TP
       +.BI -e " expression"
       +File divisions occur at each line
       +that matches a regular
       +.IR expression ;
       +see 
       +.IR regexp (7).
       +Multiple
       +.B -e
       +options may appear.
       +If a subexpression of
       +.I expression
       +is contained in parentheses
       +.BR ( ... ) ,
       +the output file name is the portion of the
       +line which matches the subexpression.
       +.TP
       +.BI -f " stem
       +Use
       +.I stem
       +instead of
       +.B x
       +in output file names.
       +.TP
       +.BI -s " suffix
       +Append
       +.I suffix
       +to names identified under
       +.BR -e .
       +.TP
       +.B -x
       +Exclude the matched input line from the output file.
       +.TP
       +.B -i
       +Ignore case in option
       +.BR -e ;
       +force output file names (excluding the suffix)
       +to lower case.
       +.SH SOURCE
       +.B \*9/src/cmd/split.c
       +.SH SEE ALSO
       +.IR sed (1), 
       +.IR awk (1),
       +.IR grep (1),
       +.IR regexp (7)
   DIR diff --git a/split/split.c b/split/split.c
       @@ -0,0 +1,189 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include <ctype.h>
       +#include <regexp.h>
       +
       +char        digit[] = "0123456789";
       +char        *suffix = "";
       +char        *stem = "x";
       +char        suff[] = "aa";
       +char        name[200];
       +Biobuf        bout;
       +Biobuf        *output = &bout;
       +
       +extern int nextfile(void);
       +extern int matchfile(Resub*);
       +extern void openf(void);
       +extern char *fold(char*,int);
       +extern void usage(void);
       +extern void badexp(void);
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        Reprog *exp;
       +        char *pattern = 0;
       +        int n = 1000;
       +        char *line;
       +        int xflag = 0;
       +        int iflag = 0;
       +        Biobuf bin;
       +        Biobuf *b = &bin;
       +        char buf[256];
       +
       +        ARGBEGIN {
       +        case 'l':
       +        case 'n':
       +                n=atoi(EARGF(usage()));
       +                break;
       +        case 'e':
       +                pattern = strdup(EARGF(usage()));
       +                break;
       +        case 'f':
       +                stem = strdup(EARGF(usage()));
       +                break;
       +        case 's':
       +                suffix = strdup(EARGF(usage()));
       +                break;
       +        case 'x':
       +                xflag++;
       +                break;
       +        case 'i':
       +                iflag++;
       +                break;
       +        default:
       +                usage();
       +                break;
       +
       +        } ARGEND;
       +
       +        if(argc < 0 || argc > 1)
       +                usage();
       +
       +        if(argc != 0) {
       +                b = Bopen(argv[0], OREAD);
       +                if(b == nil) {
       +                        fprint(2, "split: can't open %s: %r\n", argv[0]);
       +                        exits("open");
       +                }
       +        } else
       +                Binit(b, 0, OREAD);
       +
       +        if(pattern) {
       +                if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
       +                        badexp();
       +                while((line=Brdline(b,'\n')) != 0) {
       +                        Resub match[2];
       +                        memset(match, 0, sizeof match);
       +                        line[Blinelen(b)-1] = 0;
       +                        if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
       +                                if(matchfile(match) && xflag)
       +                                        continue;
       +                        } else if(output == 0)
       +                                nextfile();        /* at most once */
       +                        Bwrite(output, line, Blinelen(b)-1);
       +                        Bputc(output, '\n');
       +                }
       +        } else {
       +                int linecnt = n;
       +
       +                while((line=Brdline(b,'\n')) != 0) {
       +                        if(++linecnt > n) {
       +                                nextfile();
       +                                linecnt = 1;
       +                        }
       +                        Bwrite(output, line, Blinelen(b));
       +                }
       +
       +                /*
       +                 * in case we didn't end with a newline, tack whatever's 
       +                 * left onto the last file
       +                 */
       +                while((n = Bread(b, buf, sizeof(buf))) > 0)
       +                        Bwrite(output, buf, n);
       +        }
       +        if(b != nil)
       +                Bterm(b);
       +        exits(0);
       +}
       +
       +int
       +nextfile(void)
       +{
       +        static int canopen = 1;
       +        if(suff[0] > 'z') {
       +                if(canopen)
       +                        fprint(2, "split: file %szz not split\n",stem);
       +                canopen = 0;
       +        } else {
       +                strcpy(name, stem);
       +                strcat(name, suff);
       +                if(++suff[1] > 'z') 
       +                        suff[1] = 'a', ++suff[0];
       +                openf();
       +        }
       +        return canopen;
       +}
       +
       +int
       +matchfile(Resub *match)
       +{
       +        if(match[1].s.sp) {
       +                int len = match[1].e.ep - match[1].s.sp;
       +                strncpy(name, match[1].s.sp, len);
       +                strcpy(name+len, suffix);
       +                openf();
       +                return 1;
       +        } 
       +        return nextfile();
       +}
       +
       +void
       +openf(void)
       +{
       +        static int fd = 0;
       +        Bflush(output);
       +        Bterm(output);
       +        if(fd > 0)
       +                close(fd);
       +        fd = create(name,OWRITE,0666);
       +        if(fd < 0) {
       +                fprint(2, "grep: can't create %s: %r\n", name);
       +                exits("create");
       +        }
       +        Binit(output, fd, OWRITE);
       +}
       +
       +char *
       +fold(char *s, int n)
       +{
       +        static char *fline;
       +        static int linesize = 0;
       +        char *t;
       +
       +        if(linesize < n+1){
       +                fline = realloc(fline,n+1);
       +                linesize = n+1;
       +        }
       +        for(t=fline; *t++ = tolower((uchar)*s++); )
       +                continue;
       +                /* we assume the 'A'-'Z' only appear as themselves
       +                 * in a utf encoding.
       +                 */
       +        return fline;
       +}
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
       +        exits("usage");
       +}
       +
       +void
       +badexp(void)
       +{
       +        fprint(2, "split: bad regular expression\n");
       +        exits("bad regular expression");
       +}
   DIR diff --git a/strings/Makefile b/strings/Makefile
       @@ -0,0 +1,10 @@
       +# strings - strings unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = strings
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/strings/strings.1 b/strings/strings.1
       @@ -0,0 +1,28 @@
       +.TH STRINGS 1
       +.SH NAME
       +strings \- extract printable strings
       +.SH SYNOPSIS
       +.B strings
       +[
       +.I file ...
       +]
       +.SH DESCRIPTION
       +.I Strings
       +finds and prints strings containing 6 or more
       +consecutive printable UTF-encoded characters
       +in a (typically) binary file, default
       +standard input.
       +Printable characters are taken to be
       +.SM ASCII
       +characters from blank through tilde (hexadecimal 20 through 7E), inclusive,
       +and
       +all other characters from value 00A0 to FFFF.
       +Strings reports
       +the decimal offset within the file at which the string starts and the text
       +of the string. If the string is longer than 70 runes the line is
       +terminated by three dots and the printing is resumed on the next
       +line with the offset of the continuation line.
       +.SH SOURCE
       +.B \*9/src/cmd/strings.c
       +.SH SEE ALSO
       +.IR nm (1)
   DIR diff --git a/strings/strings.c b/strings/strings.c
       @@ -0,0 +1,90 @@
       +#include        <u.h>
       +#include         <libc.h>
       +#include        <bio.h>
       +
       +Biobuf        *fin;
       +Biobuf        fout;
       +
       +#define        MINSPAN                6                /* Min characters in string */
       +
       +#define BUFSIZE                70
       +
       +void stringit(char *);
       +#undef isprint
       +#define isprint risprint
       +int isprint(Rune);
       +
       +void
       +main(int argc, char **argv)
       +{
       +        int i;
       +
       +        Binit(&fout, 1, OWRITE);
       +        if(argc < 2) {
       +                stringit("/dev/stdin");
       +                exits(0);
       +        }
       +
       +        for(i = 1; i < argc; i++) {
       +                if(argc > 2)
       +                        print("%s:\n", argv[i]);
       +
       +                stringit(argv[i]);
       +        }
       +
       +        exits(0);
       +}
       +
       +void
       +stringit(char *str)
       +{
       +        long posn, start;
       +        int cnt = 0;
       +        long c;
       +
       +        Rune buf[BUFSIZE];
       +
       +        if ((fin = Bopen(str, OREAD)) == 0) {
       +                perror("open");
       +                return;
       +        }
       +
       +        start = 0;
       +        posn = Boffset(fin);
       +        while((c = Bgetrune(fin)) >= 0) {
       +                if(isprint(c)) {
       +                        if(start == 0)
       +                                start = posn;
       +                        buf[cnt++] = c;
       +                        if(cnt == BUFSIZE-1) {
       +                                buf[cnt] = 0;
       +                                Bprint(&fout, "%8ld: %S ...\n", start, buf);
       +                                start = 0;
       +                                cnt = 0;
       +                        }
       +                } else {
       +                         if(cnt >= MINSPAN) {
       +                                buf[cnt] = 0;
       +                                Bprint(&fout, "%8ld: %S\n", start, buf);
       +                        }
       +                        start = 0;
       +                        cnt = 0;
       +                }        
       +                posn = Boffset(fin);
       +        }
       +
       +        if(cnt >= MINSPAN){
       +                buf[cnt] = 0;
       +                Bprint(&fout, "%8ld: %S\n", start, buf);
       +        }
       +        Bterm(fin);
       +}
       +
       +int
       +isprint(Rune r)
       +{
       +        if ((r >= ' ' && r <0x7f) || r > 0xA0)
       +                return 1;
       +        else
       +                return 0;
       +}
   DIR diff --git a/unicode/Makefile b/unicode/Makefile
       @@ -0,0 +1,10 @@
       +# unicode - unicode unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = unicode
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/unicode/unicode.1 b/unicode/unicode.1
   DIR diff --git a/unicode/unicode.c b/unicode/unicode.c
       @@ -0,0 +1,122 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +
       +char        usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
       +char        hex[] = "0123456789abcdefABCDEF";
       +int        numout = 0;
       +int        text = 0;
       +char        *err;
       +Biobuf        bout;
       +
       +char        *range(char*[]);
       +char        *nums(char*[]);
       +char        *chars(char*[]);
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        ARGBEGIN{
       +        case 'n':
       +                numout = 1;
       +                break;
       +        case 't':
       +                text = 1;
       +                break;
       +        }ARGEND
       +        Binit(&bout, 1, OWRITE);
       +        if(argc == 0){
       +                fprint(2, "usage: %s\n", usage);
       +                exits("usage");
       +        }
       +        if(!numout && utfrune(argv[0], '-'))
       +                exits(range(argv));
       +        if(numout || strchr(hex, argv[0][0])==0)
       +                exits(nums(argv));
       +        exits(chars(argv));
       +}
       +
       +char*
       +range(char *argv[])
       +{
       +        char *q;
       +        int min, max;
       +        int i;
       +
       +        while(*argv){
       +                q = *argv;
       +                if(strchr(hex, q[0]) == 0){
       +    err:
       +                        fprint(2, "unicode: bad range %s\n", *argv);
       +                        return "bad range";
       +                }
       +                min = strtoul(q, &q, 16);
       +                if(min<0 || min>Runemax || *q!='-')
       +                        goto err;
       +                q++;
       +                if(strchr(hex, *q) == 0)
       +                        goto err;
       +                max = strtoul(q, &q, 16);
       +                if(max<0 || max>Runemax || max<min || *q!=0)
       +                        goto err;
       +                i = 0;
       +                do{
       +                        Bprint(&bout, "%.4x %C", min, min);
       +                        i++;
       +                        if(min==max || (i&7)==0)
       +                                Bprint(&bout, "\n");
       +                        else
       +                                Bprint(&bout, "\t");
       +                        min++;
       +                }while(min<=max);
       +                argv++;
       +        }
       +        return 0;
       +}
       +
       +char*
       +nums(char *argv[])
       +{
       +        char *q;
       +        Rune r;
       +        int w;
       +
       +        while(*argv){
       +                q = *argv;
       +                while(*q){
       +                        w = chartorune(&r, q);
       +                        if(r==0x80 && (q[0]&0xFF)!=0x80){
       +                                fprint(2, "unicode: invalid utf string %s\n", *argv);
       +                                return "bad utf";
       +                        }
       +                        Bprint(&bout, "%.4x\n", r);
       +                        q += w;
       +                }
       +                argv++;
       +        }
       +        return 0;
       +}
       +
       +char*
       +chars(char *argv[])
       +{
       +        char *q;
       +        int m;
       +
       +        while(*argv){
       +                q = *argv;
       +                if(strchr(hex, q[0]) == 0){
       +    err:
       +                        fprint(2, "unicode: bad unicode value %s\n", *argv);
       +                        return "bad char";
       +                }
       +                m = strtoul(q, &q, 16);
       +                if(m<0 || m>Runemax || *q!=0)
       +                        goto err;
       +                Bprint(&bout, "%C", m);
       +                if(!text)
       +                        Bprint(&bout, "\n");
       +                argv++;
       +        }
       +        return 0;
       +}
   DIR diff --git a/unutf/Makefile b/unutf/Makefile
       @@ -0,0 +1,10 @@
       +# unutf - unutf unix port from plan9
       +# Depends on ../lib9
       +
       +TARG      = unutf
       +
       +include ../std.mk
       +
       +pre-uninstall:
       +
       +post-install:
   DIR diff --git a/unutf/unutf.1 b/unutf/unutf.1
   DIR diff --git a/unutf/unutf.c b/unutf/unutf.c
       @@ -0,0 +1,20 @@
       +/*
       + * stupid little program to pipe unicode chars through 
       + * when converting to non-utf compilers.
       + */
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +
       +Biobuf bin;
       +
       +void
       +main(void)
       +{
       +        int c;
       +
       +        Binit(&bin, 0, OREAD);
       +        while((c = Bgetrune(&bin)) >= 0)
       +                print("0x%ux\n", c);
       +        exits(0);
       +}