URI: 
       toops - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 30d57c74899f4825349441e2b1e7a0bb5995c21f
   DIR parent 3e0d8fb3ea83b2b65a6425c65beda887140f9349
  HTML Author: rsc <devnull@localhost>
       Date:   Tue, 27 Dec 2005 23:16:48 +0000
       
       oops
       
       Diffstat:
         A src/cmd/tcs/html.c                  |     330 +++++++++++++++++++++++++++++++
       
       1 file changed, 330 insertions(+), 0 deletions(-)
       ---
   DIR diff --git a/src/cmd/tcs/html.c b/src/cmd/tcs/html.c
       t@@ -0,0 +1,330 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include "hdr.h"
       +#include "conv.h"
       +
       +typedef struct Hchar Hchar;
       +struct Hchar
       +{
       +        char *s;
       +        Rune r;
       +};
       +
       +/* &lt;, &gt;, &quot;, &amp; intentionally omitted */
       +
       +static Hchar byname[] =
       +{
       +        {"AElig", 198},
       +        {"Aacute", 193},
       +        {"Acirc", 194},
       +        {"Agrave", 192},
       +        {"Aring", 197},
       +        {"Atilde", 195},
       +        {"Auml", 196},
       +        {"Ccedil", 199},
       +        {"ETH", 208},
       +        {"Eacute", 201},
       +        {"Ecirc", 202},
       +        {"Egrave", 200},
       +        {"Euml", 203},
       +        {"Iacute", 205},
       +        {"Icirc", 206},
       +        {"Igrave", 204},
       +        {"Iuml", 207},
       +        {"Ntilde", 209},
       +        {"Oacute", 211},
       +        {"Ocirc", 212},
       +        {"Ograve", 210},
       +        {"Oslash", 216},
       +        {"Otilde", 213},
       +        {"Ouml", 214},
       +        {"THORN", 222},
       +        {"Uacute", 218},
       +        {"Ucirc", 219},
       +        {"Ugrave", 217},
       +        {"Uuml", 220},
       +        {"Yacute", 221},
       +        {"aacute", 225},
       +        {"acirc", 226},
       +        {"acute", 180},
       +        {"aelig", 230},
       +        {"agrave", 224},
       +        {"alpha", 945},
       +        {"aring", 229},
       +        {"atilde", 227},
       +        {"auml", 228},
       +        {"beta", 946},
       +        {"brvbar", 166},
       +        {"ccedil", 231},
       +        {"cdots", 8943},
       +        {"cedil", 184},
       +        {"cent", 162},
       +        {"chi", 967},
       +        {"copy", 169},
       +        {"curren", 164},
       +        {"ddots", 8945},
       +        {"deg", 176},
       +        {"delta", 948},
       +        {"divide", 247},
       +        {"eacute", 233},
       +        {"ecirc", 234},
       +        {"egrave", 232},
       +        {"emdash", 8212},        /* non-standard but commonly used */
       +        {"emsp", 8195},
       +        {"endash", 8211},        /* non-standard but commonly used */
       +        {"ensp", 8194},
       +        {"epsilon", 949},
       +        {"eta", 951},
       +        {"eth", 240},
       +        {"euml", 235},
       +        {"frac12", 189},
       +        {"frac14", 188},
       +        {"frac34", 190},
       +        {"gamma", 947},
       +        {"iacute", 237},
       +        {"icirc", 238},
       +        {"iexcl", 161},
       +        {"igrave", 236},
       +        {"iota", 953},
       +        {"iquest", 191},
       +        {"iuml", 239},
       +        {"kappa", 954},
       +        {"lambda", 955},
       +        {"laquo", 171},
       +        {"ldquo", 8220},
       +        {"ldots", 8230},
       +        {"lsquo", 8216},
       +        {"macr", 175},
       +        {"mdash", 8212},
       +        {"micro", 181},
       +        {"middot", 183},
       +        {"mu", 956},
       +        {"nbsp", 160},
       +        {"ndash", 8211},
       +        {"not", 172},
       +        {"ntilde", 241},
       +        {"nu", 957},
       +        {"oacute", 243},
       +        {"ocirc", 244},
       +        {"ograve", 242},
       +        {"omega", 969},
       +        {"omicron", 959},
       +        {"ordf", 170},
       +        {"ordm", 186},
       +        {"oslash", 248},
       +        {"otilde", 245},
       +        {"ouml", 246},
       +        {"para", 182},
       +        {"phi", 966},
       +        {"pi", 960},
       +        {"plusmn", 177},
       +        {"pound", 163},
       +        {"psi", 968},
       +        {"quad", 8193},
       +        {"raquo", 187},
       +        {"rdquo", 8221},
       +        {"reg", 174},
       +        {"rho", 961},
       +        {"rsquo", 8217},
       +        {"sect", 167},
       +        {"shy", 173},
       +        {"sigma", 963},
       +        {"sp", 8194},
       +        {"sup1", 185},
       +        {"sup2", 178},
       +        {"sup3", 179},
       +        {"szlig", 223},
       +        {"tau", 964},
       +        {"theta", 952},
       +        {"thinsp", 8201},
       +        {"thorn", 254},
       +        {"times", 215},
       +        {"trade", 8482},
       +        {"uacute", 250},
       +        {"ucirc", 251},
       +        {"ugrave", 249},
       +        {"uml", 168},
       +        {"upsilon", 965},
       +        {"uuml", 252},
       +        {"varepsilon", 8712},
       +        {"varphi", 981},
       +        {"varpi", 982},
       +        {"varrho", 1009},
       +        {"vdots", 8942},
       +        {"vsigma", 962},
       +        {"vtheta", 977},
       +        {"xi", 958},
       +        {"yacute", 253},
       +        {"yen", 165},
       +        {"yuml", 255},
       +        {"zeta", 950}
       +};
       +
       +static Hchar byrune[nelem(byname)];
       +
       +static int
       +hnamecmp(const void *va, const void *vb)
       +{
       +        Hchar *a, *b;
       +        
       +        a = (Hchar*)va;
       +        b = (Hchar*)vb;
       +        return strcmp(a->s, b->s);
       +}
       +
       +static int
       +hrunecmp(const void *va, const void *vb)
       +{
       +        Hchar *a, *b;
       +        
       +        a = (Hchar*)va;
       +        b = (Hchar*)vb;
       +        return a->r - b->r;
       +}
       +
       +static void
       +html_init(void)
       +{
       +        static int init;
       +        
       +        if(init)
       +                return;
       +        init = 1;
       +        memmove(byrune, byname, sizeof byrune);
       +        qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);
       +        qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);
       +}
       +
       +static Rune
       +findbyname(char *s)
       +{
       +        Hchar *h;
       +        int n, m, x;
       +        
       +        h = byname;
       +        n = nelem(byname);
       +        while(n > 0){
       +                m = n/2;
       +                x = strcmp(h[m].s, s);
       +                if(x == 0)
       +                        return h[m].r;
       +                if(x < 0){
       +                        h += m+1;
       +                        n -= m+1;
       +                }else
       +                        n = m;
       +        }
       +        return Runeerror;
       +}
       +
       +static char*
       +findbyrune(Rune r)
       +{
       +        Hchar *h;
       +        int n, m;
       +
       +        h = byrune;
       +        n = nelem(byrune);
       +        while(n > 0){
       +                m = n/2;
       +                if(h[m].r == r)
       +                        return h[m].s;
       +                if(h[m].r < r){
       +                        h += m+1;
       +                        n -= m+1;
       +                }else
       +                        n = m;
       +        }
       +        return nil;
       +}
       +
       +void
       +html_in(int fd, long *x, struct convert *out)
       +{
       +        char buf[100], *p;
       +        Biobuf b;
       +        Rune rbuf[N];
       +        Rune *r, *er;
       +        int c, i;
       +        
       +        USED(x);
       +        
       +        html_init();
       +        r = rbuf;
       +        er = rbuf+N;
       +        Binit(&b, fd, OREAD);
       +        while((c = Bgetrune(&b)) != Beof){
       +                if(r >= er){
       +                        OUT(out, rbuf, r-rbuf);
       +                        r = rbuf;
       +                }
       +                if(c == '&'){
       +                        buf[0] = c;
       +                        for(i=1; i<nelem(buf)-1;){
       +                                c = Bgetc(&b);
       +                                if(c == Beof)
       +                                        break;
       +                                buf[i++] = c;
       +                                if(strchr("; \t\r\n", c))
       +                                        break;
       +                        }
       +                        buf[i] = 0;
       +                        if(buf[i-1] == ';'){
       +                                buf[i-1] = 0;
       +                                if((c = findbyname(buf+1)) != Runeerror){
       +                                        *r++ = c;
       +                                        continue;
       +                                }
       +                                buf[i-1] = ';';
       +                                if(buf[1] == '#'){
       +                                        if(buf[2] == 'x')
       +                                                c = strtol(buf+3, &p, 16);
       +                                        else
       +                                                c = strtol(buf+2, &p, 10);
       +                                        if(*p != ';' || c >= NRUNE || c < 0)
       +                                                goto bad;
       +                                        *r++ = c;
       +                                        continue;
       +                                }
       +                        }
       +                bad:
       +                        for(p=buf; p<buf+i; ){
       +                                p += chartorune(r++, p);
       +                                if(r >= er){
       +                                        OUT(out, rbuf, r-rbuf);
       +                                        r = rbuf;
       +                                }
       +                        }
       +                        continue;
       +                }
       +                *r++ = c;
       +        }
       +        if(r > rbuf)
       +                OUT(out, rbuf, r-rbuf);
       +}
       +
       +/*
       + * use biobuf because can use more than UTFmax bytes per rune
       + */
       +void
       +html_out(Rune *r, int n, long *x)
       +{
       +        char *s;
       +        Biobuf b;
       +        Rune *er;
       +        
       +        html_init();
       +        Binit(&b, 1, OWRITE);
       +        er = r+n;
       +        for(; r<er; r++){
       +                if(*r < Runeself)
       +                        Bputrune(&b, *r);
       +                else if((s = findbyrune(*r)) != nil)
       +                        Bprint(&b, "&%s;", s);
       +                else
       +                        Bprint(&b, "&#%04x;", *r);
       +        }
       +        Bflush(&b);
       +}
       +