mx1.adamsgaard.dk/1/src/plan9port/commit/536f9b83c0bed9986800d806c74ae4d225628fe3.gph

  URI:

       timport from plan9 - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 536f9b83c0bed9986800d806c74ae4d225628fe3
   DIR parent 44fc56d8c3cc534bf903133c63a9c9ecb42e5b63
  HTML Author: rsc <devnull@localhost>
       Date:   Sun, 21 May 2006 18:57:51 +0000
       
       import from plan9
       
       Diffstat:
         M src/cmd/tcs/conv.h                  |       2 ++
         M src/cmd/tcs/conv_big5.c             |       1 +
         M src/cmd/tcs/conv_gb.c               |       1 +
         M src/cmd/tcs/conv_jis.c              |       1 +
         M src/cmd/tcs/conv_ksc.c              |       1 +
         M src/cmd/tcs/html.c                  |     128 ++++++++++++++++++++++++++++++-
         M src/cmd/tcs/mkfile                  |       6 +++++-
         M src/cmd/tcs/tcs.c                   |     212 ++++++++++++++++++++++++-------
         M src/cmd/tcs/utf.c                   |      30 ++++++++++++++++--------------
       
       9 files changed, 320 insertions(+), 62 deletions(-)
       ---
   DIR diff --git a/src/cmd/tcs/conv.h b/src/cmd/tcs/conv.h
       t@@ -13,6 +13,8 @@ void uksc_in(int fd, long *notused, struct convert *out);
        void uksc_out(Rune *base, int n, long *notused);
        void html_in(int fd, long *notused, struct convert *out);
        void html_out(Rune *base, int n, long *notused);
       +void tune_in(int fd, long *notused, struct convert *out);
       +void tune_out(Rune *base, int n, long *notused);
        
        #define                emit(x)                *(*r)++ = (x)
        #define                NRUNE                65536
   DIR diff --git a/src/cmd/tcs/conv_big5.c b/src/cmd/tcs/conv_big5.c
       t@@ -110,6 +110,7 @@ big5_in(int fd, long *notused, struct convert *out)
                big5proc(-1, &r, nin);
                if(r > ob)
                        OUT(out, ob, r-ob);
       +        OUT(out, ob, 0);
        }
        
        void
   DIR diff --git a/src/cmd/tcs/conv_gb.c b/src/cmd/tcs/conv_gb.c
       t@@ -88,6 +88,7 @@ gb_in(int fd, long *notused, struct convert *out)
                gbproc(-1, &r, nin);
                if(r > ob)
                        OUT(out, ob, r-ob);
       +        OUT(out, ob, 0);
        }
        
        void
   DIR diff --git a/src/cmd/tcs/conv_jis.c b/src/cmd/tcs/conv_jis.c
       t@@ -363,6 +363,7 @@ do_in(int fd, void (*procfn)(int, Rune **, long), struct convert *out)
                (*procfn)(-1, &r, nin);
                if(r > ob)
                        OUT(out, ob, r-ob);
       +        OUT(out, ob, 0);
        }
        
        void
   DIR diff --git a/src/cmd/tcs/conv_ksc.c b/src/cmd/tcs/conv_ksc.c
       t@@ -109,6 +109,7 @@ uksc_in(int fd, long *notused, struct convert *out)
                ukscproc(-1, &r, nin);
                if(r > ob)
                        OUT(out, ob, r-ob);
       +        OUT(out, ob, 0);
        }
        
        void
   DIR diff --git a/src/cmd/tcs/html.c b/src/cmd/tcs/html.c
       t@@ -19,132 +19,251 @@ static Hchar byname[] =
                {"Aacute", 193},
                {"Acirc", 194},
                {"Agrave", 192},
       +        {"Alpha", 913},
                {"Aring", 197},
                {"Atilde", 195},
                {"Auml", 196},
       +        {"Beta", 914},
                {"Ccedil", 199},
       +        {"Chi", 935},
       +        {"Dagger", 8225},
       +        {"Delta", 916},
                {"ETH", 208},
                {"Eacute", 201},
                {"Ecirc", 202},
                {"Egrave", 200},
       +        {"Epsilon", 917},
       +        {"Eta", 919},
                {"Euml", 203},
       +        {"Gamma", 915},
                {"Iacute", 205},
                {"Icirc", 206},
                {"Igrave", 204},
       +        {"Iota", 921},
                {"Iuml", 207},
       +        {"Kappa", 922},
       +        {"Lambda", 923},
       +        {"Mu", 924},
                {"Ntilde", 209},
       +        {"Nu", 925},
       +        {"OElig", 338},
                {"Oacute", 211},
                {"Ocirc", 212},
                {"Ograve", 210},
       +        {"Omega", 937},
       +        {"Omicron", 927},
                {"Oslash", 216},
                {"Otilde", 213},
                {"Ouml", 214},
       +        {"Phi", 934},
       +        {"Pi", 928},
       +        {"Prime", 8243},
       +        {"Psi", 936},
       +        {"Rho", 929},
       +        {"Scaron", 352},
       +        {"Sigma", 931},
                {"THORN", 222},
       +        {"Tau", 932},
       +        {"Theta", 920},
                {"Uacute", 218},
                {"Ucirc", 219},
                {"Ugrave", 217},
       +        {"Upsilon", 933},
                {"Uuml", 220},
       +        {"Xi", 926},
                {"Yacute", 221},
       +        {"Yuml", 376},
       +        {"Zeta", 918},
                {"aacute", 225},
                {"acirc", 226},
                {"acute", 180},
                {"aelig", 230},
                {"agrave", 224},
       +        {"alefsym", 8501},
                {"alpha", 945},
       +        {"amp", 38},
       +        {"and", 8743},
       +        {"ang", 8736},
                {"aring", 229},
       +        {"asymp", 8776},
                {"atilde", 227},
                {"auml", 228},
       +        {"bdquo", 8222},
                {"beta", 946},
                {"brvbar", 166},
       +        {"bull", 8226},
       +        {"cap", 8745},
                {"ccedil", 231},
                {"cdots", 8943},
                {"cedil", 184},
                {"cent", 162},
                {"chi", 967},
       +        {"circ", 710},
       +        {"clubs", 9827},
       +        {"cong", 8773},
                {"copy", 169},
       +        {"crarr", 8629},
       +        {"cup", 8746},
                {"curren", 164},
       +        {"dArr", 8659},
       +        {"dagger", 8224},
       +        {"darr", 8595},
                {"ddots", 8945},
                {"deg", 176},
                {"delta", 948},
       +        {"diams", 9830},
                {"divide", 247},
                {"eacute", 233},
                {"ecirc", 234},
                {"egrave", 232},
                {"emdash", 8212},        /* non-standard but commonly used */
       +        {"empty", 8709},
                {"emsp", 8195},
                {"endash", 8211},        /* non-standard but commonly used */
                {"ensp", 8194},
                {"epsilon", 949},
       +        {"equiv", 8801},
                {"eta", 951},
                {"eth", 240},
                {"euml", 235},
       +        {"euro", 8364},
       +        {"exist", 8707},
       +        {"fnof", 402},
       +        {"forall", 8704},
                {"frac12", 189},
                {"frac14", 188},
                {"frac34", 190},
       +        {"frasl", 8260},
                {"gamma", 947},
       +        {"ge", 8805},
       +        {"gt", 62},
       +        {"hArr", 8660},
       +        {"harr", 8596},
       +        {"hearts", 9829},
       +        {"hellip", 8230},
                {"iacute", 237},
                {"icirc", 238},
                {"iexcl", 161},
                {"igrave", 236},
       +        {"image", 8465},
       +        {"infin", 8734},
       +        {"int", 8747},
                {"iota", 953},
                {"iquest", 191},
       +        {"isin", 8712},
                {"iuml", 239},
                {"kappa", 954},
       +        {"lArr", 8656},
                {"lambda", 955},
       +        {"lang", 9001},
                {"laquo", 171},
       -        {"ldquo", 8220},
       +        {"larr", 8592},
       +        {"lceil", 8968},
                {"ldots", 8230},
       +        {"ldquo", 8220},
       +        {"le", 8804},
       +        {"lfloor", 8970},
       +        {"lowast", 8727},
       +        {"loz", 9674},
       +        {"lrm", 8206},
       +        {"lsaquo", 8249},
                {"lsquo", 8216},
       +        {"lt", 60},
                {"macr", 175},
                {"mdash", 8212},
                {"micro", 181},
                {"middot", 183},
       +        {"minus", 8722},
                {"mu", 956},
       +        {"nabla", 8711},
                {"nbsp", 160},
                {"ndash", 8211},
       +        {"ne", 8800},
       +        {"ni", 8715},
                {"not", 172},
       +        {"notin", 8713},
       +        {"nsub", 8836},
                {"ntilde", 241},
                {"nu", 957},
                {"oacute", 243},
                {"ocirc", 244},
       +        {"oelig", 339},
                {"ograve", 242},
       +        {"oline", 8254},
                {"omega", 969},
                {"omicron", 959},
       +        {"oplus", 8853},
       +        {"or", 8744},
                {"ordf", 170},
                {"ordm", 186},
                {"oslash", 248},
                {"otilde", 245},
       +        {"otimes", 8855},
                {"ouml", 246},
                {"para", 182},
       +        {"part", 8706},
       +        {"permil", 8240},
       +        {"perp", 8869},
                {"phi", 966},
                {"pi", 960},
       +        {"piv", 982},
                {"plusmn", 177},
                {"pound", 163},
       +        {"prime", 8242},
       +        {"prod", 8719},
       +        {"prop", 8733},
                {"psi", 968},
                {"quad", 8193},
       +        {"quot", 34},
       +        {"rArr", 8658},
       +        {"radic", 8730},
       +        {"rang", 9002},
                {"raquo", 187},
       +        {"rarr", 8594},
       +        {"rceil", 8969},
                {"rdquo", 8221},
       +        {"real", 8476},
                {"reg", 174},
       +        {"rfloor", 8971},
                {"rho", 961},
       +        {"rlm", 8207},
       +        {"rsaquo", 8250},
                {"rsquo", 8217},
       +        {"sbquo", 8218},
       +        {"scaron", 353},
       +        {"sdot", 8901},
                {"sect", 167},
                {"shy", 173},
                {"sigma", 963},
       +        {"sigmaf", 962},
       +        {"sim", 8764},
                {"sp", 8194},
       +        {"spades", 9824},
       +        {"sub", 8834},
       +        {"sube", 8838},
       +        {"sum", 8721},
       +        {"sup", 8835},
                {"sup1", 185},
                {"sup2", 178},
                {"sup3", 179},
       +        {"supe", 8839},
                {"szlig", 223},
                {"tau", 964},
       +        {"there4", 8756},
                {"theta", 952},
       +        {"thetasym", 977},
                {"thinsp", 8201},
                {"thorn", 254},
       +        {"tilde", 732},
                {"times", 215},
                {"trade", 8482},
       +        {"uArr", 8657},
                {"uacute", 250},
       +        {"uarr", 8593},
                {"ucirc", 251},
                {"ugrave", 249},
                {"uml", 168},
       +        {"upsih", 978},
                {"upsilon", 965},
                {"uuml", 252},
                {"varepsilon", 8712},
       t@@ -154,11 +273,14 @@ static Hchar byname[] =
                {"vdots", 8942},
                {"vsigma", 962},
                {"vtheta", 977},
       +        {"weierp", 8472},
                {"xi", 958},
                {"yacute", 253},
                {"yen", 165},
                {"yuml", 255},
       -        {"zeta", 950}
       +        {"zeta", 950},
       +        {"zwj", 8205},
       +        {"zwnj", 8204}
        };
        
        static Hchar byrune[nelem(byname)];
       t@@ -302,6 +424,7 @@ html_in(int fd, long *x, struct convert *out)
                }
                if(r > rbuf)
                        OUT(out, rbuf, r-rbuf);
       +        OUT(out, rbuf, 0);
        }
        
        /*
       t@@ -314,6 +437,7 @@ html_out(Rune *r, int n, long *x)
                Biobuf b;
                Rune *er;
                
       +        USED(x);
                html_init();
                Binit(&b, 1, OWRITE);
                er = r+n;
   DIR diff --git a/src/cmd/tcs/mkfile b/src/cmd/tcs/mkfile
       t@@ -11,7 +11,8 @@ OFILES=tcs.$O\
                kuten208.$O\
                gb.$O\
                ksc.$O\
       -        big5.$O
       +        big5.$O\
       +        tune.$O\
        
        <$PLAN9/src/mkone
        CFLAGS= -DPLAN9 $CFLAGS
       t@@ -23,6 +24,9 @@ tcs.$O big5.$O:                big5.h
        tcs.$O gb.$O:                gb.h
        tcs.$O:                        cyrillic.h
        tcs.$O:                        conv.h
       +tcs.$O:        8859.h
       +tcs.$O:        ms.h
       +tcs.$O:        misc.h
        conv%.$O:                conv.h
        conv_ksc.$O:                ksc.h
        
   DIR diff --git a/src/cmd/tcs/tcs.c b/src/cmd/tcs/tcs.c
       t@@ -54,7 +54,7 @@ main(int argc, char **argv)
                        clean = 1;
                        break;
                case 'f':
       -                from = ARGF();
       +                from = EARGF(usage());
                        break;        
                case 'l':
                        listem = 1;
       t@@ -63,7 +63,7 @@ main(int argc, char **argv)
                        squawk = 0;
                        break;
                case 't':
       -                to = ARGF();
       +                to = EARGF(usage());
                        break;
                case 'v':
                        verbose = 1;
       t@@ -160,7 +160,7 @@ conv(char *name, int from)
                struct convert *c;
        
                for(c = convert; c->name; c++){
       -                if(strcmp(c->name, name) != 0)
       +                if(cistrcmp(c->name, name) != 0)
                                continue;
                        if(c->flags&Table)
                                return(c);
       t@@ -208,23 +208,79 @@ unicode_in(int fd, long *notused, struct convert *out)
                }
                while((n = read(fd, (char *)buf, 2*N)) > 0){
                        ninput += n;
       +                if(swabme)
       +                        swab2((char *)buf, n);
                        if(n&1){
                                if(squawk)
                                        EPR "%s: odd byte count in %s\n", argv0, file);
                                nerrors++;
                                if(clean)
                                        n--;
       -                        else {
       -                                n++;
       -                                buf[n/2] = Runeerror;
       -                                if(swabme)        /* swab so later swab undoes it */
       -                                        swab2((char *)&buf[n/2], 2);
       -                        }
       +                        else
       +                                buf[n++/2] = Runeerror;
       +                }
       +                OUT(out, buf, n/2);
       +        }
       +}
       +
       +void
       +unicode_in_be(int fd, long *notused, struct convert *out)
       +{
       +        int i, n;
       +        Rune buf[N], r;
       +        uchar *p;
       +
       +        USED(notused);
       +        while((n = read(fd, (char *)buf, 2*N)) > 0){
       +                ninput += n;
       +                p = (uchar*)buf;
       +                for(i=0; i<n/2; i++){
       +                        r = *p++<<8;
       +                        r |= *p++;
       +                        buf[i] = r;
       +                }
       +                if(n&1){
       +                        if(squawk)
       +                                EPR "%s: odd byte count in %s\n", argv0, file);
       +                        nerrors++;
       +                        if(clean)
       +                                n--;
       +                        else
       +                                buf[n++/2] = Runeerror;
                        }
       -                if(swabme)
       -                        swab2((char *)buf, n);
                        OUT(out, buf, n/2);
                }
       +        OUT(out, buf, 0);
       +}
       +
       +void
       +unicode_in_le(int fd, long *notused, struct convert *out)
       +{
       +        int i, n;
       +        Rune buf[N], r;
       +        uchar *p;
       +
       +        USED(notused);
       +        while((n = read(fd, (char *)buf, 2*N)) > 0){
       +                ninput += n;
       +                p = (uchar*)buf;
       +                for(i=0; i<n/2; i++){
       +                        r = *p++;
       +                        r |= *p++<<8;
       +                        buf[i] = r;
       +                }
       +                if(n&1){
       +                        if(squawk)
       +                                EPR "%s: odd byte count in %s\n", argv0, file);
       +                        nerrors++;
       +                        if(clean)
       +                                n--;
       +                        else
       +                                buf[n++/2] = Runeerror;
       +                }
       +                OUT(out, buf, n/2);
       +        }
       +        OUT(out, buf, 0);
        }
        
        void
       t@@ -245,6 +301,44 @@ unicode_out(Rune *base, int n, long *notused)
        }
        
        void
       +unicode_out_be(Rune *base, int n, long *notused)
       +{
       +        int i;
       +        uchar *p;
       +        Rune r;
       +
       +        USED(notused);
       +        p = (uchar*)base;
       +        for(i=0; i<n; i++){
       +                r = base[i];
       +                *p++ = r>>8;
       +                *p++ = r;
       +        }
       +        nrunes += n;
       +        noutput += 2*n;
       +        write(1, (char *)base, 2*n);
       +}
       +
       +void
       +unicode_out_le(Rune *base, int n, long *notused)
       +{
       +        int i;
       +        uchar *p;
       +        Rune r;
       +
       +        USED(notused);
       +        p = (uchar*)base;
       +        for(i=0; i<n; i++){
       +                r = base[i];
       +                *p++ = r;
       +                *p++ = r>>8;
       +        }
       +        nrunes += n;
       +        noutput += 2*n;
       +        write(1, (char *)base, 2*n);
       +}
       +
       +void
        intable(int fd, long *table, struct convert *out)
        {
                uchar buf[N];
       t@@ -270,6 +364,7 @@ intable(int fd, long *table, struct convert *out)
                        }
                        OUT(out, runes, r-runes);
                }
       +        OUT(out, runes, 0);
                if(n < 0){
        #ifdef        PLAN9
                        EPR "%s: input read: %r\n", argv0);
       t@@ -403,64 +498,91 @@ struct convert convert[] =
                { "av", "Alternativnyj Variant", Table, (void *)tabav },
                { "big5", "Big 5 (HKU)", From|Func, 0, (Fnptr)big5_in },
                { "big5", "Big 5 (HKU)", Func, 0, (Fnptr)big5_out },
       -        { "cp437", "Code Page 437 (US)", Table, (void*)tabcp437 },
       -        { "cp720", "Code Page 720 (Arabic)", Table, (void*)tabcp720 },
       -        { "cp737", "Code Page 737 (Greek)", Table, (void*)tabcp737 },
       -        { "cp775", "Code Page 775 (Baltic)", Table, (void*)tabcp775 },
       -        { "cp850", "Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
       -        { "cp852", "Code Page 852 (Latin II)", Table, (void*)tabcp852 },
       -        { "cp855", "Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
       -        { "cp857", "Code Page 857 (Turkish)", Table, (void*)tabcp857 },
       -        { "cp858", "Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
       -        { "cp862", "Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
       -        { "cp866", "Code Page 866 (Russian)", Table, (void*)tabcp866 },
       -        { "cp874", "Code Page 874 (Thai)", Table, (void*)tabcp874 },
       -        { "cp1250", "Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
       -        { "cp1251", "Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
       -        { "cp1252", "Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
       -        { "cp1253", "Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
       -        { "cp1254", "Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
       -        { "cp1255", "Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
       -        { "cp1256", "Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
       -        { "cp1257", "Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
       -        { "cp1258", "Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
                { "ebcdic", "EBCDIC", Table, (void *)tabebcdic },        /* 6f is recommended bad map */
                { "euc-k", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
                { "euc-k", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
       -        { "gb", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in },
       -        { "gb", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out },
       +        { "gb2312", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in },
       +        { "gb2312", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out },
                { "html", "HTML", From|Func, 0, (Fnptr)html_in },
                { "html", "HTML", Func, 0, (Fnptr)html_out },
       +        { "ibm437", "IBM Code Page 437 (US)", Table, (void*)tabcp437 },
       +        { "ibm720", "IBM Code Page 720 (Arabic)", Table, (void*)tabcp720 },
       +        { "ibm737", "IBM Code Page 737 (Greek)", Table, (void*)tabcp737 },
       +        { "ibm775", "IBM Code Page 775 (Baltic)", Table, (void*)tabcp775 },
       +        { "ibm850", "IBM Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
       +        { "ibm852", "IBM Code Page 852 (Latin II)", Table, (void*)tabcp852 },
       +        { "ibm855", "IBM Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
       +        { "ibm857", "IBM Code Page 857 (Turkish)", Table, (void*)tabcp857 },
       +        { "ibm858", "IBM Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
       +        { "ibm862", "IBM Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
       +        { "ibm866", "IBM Code Page 866 (Russian)", Table, (void*)tabcp866 },
       +        { "ibm874", "IBM Code Page 874 (Thai)", Table, (void*)tabcp874 },
       +        { "iso-2022-jp", "alias for jis-kanji (MIME)", From|Func, 0, (Fnptr)jisjis_in },
       +        { "iso-2022-jp", "alias for jis-kanji (MIME)", Func, 0, (Fnptr)jisjis_out },
       +        { "iso-8859-1", "alias for 8859-1 (MIME)", Table, (void *)tab8859_1 },
       +        { "iso-8859-2", "alias for 8859-2 (MIME)", Table, (void *)tab8859_2 },
       +        { "iso-8859-3", "alias for 8859-3 (MIME)", Table, (void *)tab8859_3 },
       +        { "iso-8859-4", "alias for 8859-4 (MIME)", Table, (void *)tab8859_4 },
       +        { "iso-8859-5", "alias for 8859-5 (MIME)", Table, (void *)tab8859_5 },
       +        { "iso-8859-6", "alias for 8859-6 (MIME)", Table, (void *)tab8859_6 },
       +        { "iso-8859-7", "alias for 8859-7 (MIME)", Table, (void *)tab8859_7 },
       +        { "iso-8859-8", "alias for 8859-8 (MIME)", Table, (void *)tab8859_8 },
       +        { "iso-8859-9", "alias for 8859-9 (MIME)", Table, (void *)tab8859_9 },
       +        { "iso-8859-10", "alias for 8859-10 (MIME)", Table, (void *)tab8859_10 },
       +        { "iso-8859-15", "alias for 8859-15 (MIME)", Table, (void *)tab8859_15 },
                { "jis", "guesses at the JIS encoding", From|Func, 0, (Fnptr)jis_in },
                { "jis-kanji", "ISO 2022-JP (Japanese)", From|Func, 0, (Fnptr)jisjis_in },
                { "jis-kanji", "ISO 2022-JP (Japanese)", Func, 0, (Fnptr)jisjis_out },
                { "koi8", "KOI-8 (GOST 19769-74)", Table, (void *)tabkoi8 },
       -        { "latin1", "ISO 8859-1", Table, (void *)tab8859_1 },
       +        { "koi8-r", "alias for koi8 (MIME)", Table, (void *)tabkoi8 },
       +        { "latin1", "alias for 8859-1", Table, (void *)tab8859_1 },
                { "macrom", "Macintosh Standard Roman character set", Table, (void *)tabmacroman },
       -        { "microsoft", "Windows (CP 1252)", Table, (void *)tabcp1252 },
       -        { "msdos", "IBM PC (CP 437)", Table, (void *)tabcp437 },
       -        { "msdos2", "IBM PC (CP 437 with graphics in C0)", Table, (void *)tabmsdos2 },
       +        { "microsoft", "alias for windows1252", Table, (void *)tabcp1252 },
                { "ms-kanji", "Microsoft, or Shift-JIS", From|Func, 0, (Fnptr)msjis_in },
                { "ms-kanji", "Microsoft, or Shift-JIS", Func, 0, (Fnptr)msjis_out },
       +        { "msdos", "IBM PC (alias for ibm437)", Table, (void *)tabcp437 },
       +        { "msdos2", "IBM PC (ibm437 with graphics in C0)", Table, (void *)tabmsdos2 },
                { "next", "NEXTSTEP character set", Table, (void *)tabnextstep },
                { "ov", "Osnovnoj Variant", Table, (void *)tabov },
       -        { "ps2", "IBM PS/2: (CP 850)", Table, (void *)tabcp850 },
       +        { "ps2", "IBM PS/2: (alias for ibm850)", Table, (void *)tabcp850 },
                { "sf1", "ISO-646: Finnish/Swedish SF-1 variant", Table, (void *)tabsf1 },
                { "sf2", "ISO-646: Finnish/Swedish SF-2 variant (recommended)", Table, (void *)tabsf2 },
       -        { "tis", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
       +        { "tis-620", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
       +        { "tune", "TUNE (Tamil)", From|Func, 0, (Fnptr)tune_in },
       +        { "tune", "TUNE (Tamil)", Func, 0, (Fnptr)tune_out },
                { "ucode", "Russian U-code", Table, (void *)tabucode },
                { "ujis", "EUC-JX: JIS 0208", From|Func, 0, (Fnptr)ujis_in },
                { "ujis", "EUC-JX: JIS 0208", Func, 0, (Fnptr)ujis_out },
                { "unicode", "Unicode 1.1", From|Func, 0, (Fnptr)unicode_in },
                { "unicode", "Unicode 1.1", Func, 0, (Fnptr)unicode_out },
       -        { "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
       -        { "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
       +        { "unicode-be", "Unicode 1.1 big-endian", From|Func, 0, (Fnptr)unicode_in_be },
       +        { "unicode-be", "Unicode 1.1 big-endian", Func, 0, (Fnptr)unicode_out_be },
       +        { "unicode-le", "Unicode 1.1 little-endian", From|Func, 0, (Fnptr)unicode_in_le },
       +        { "unicode-le", "Unicode 1.1 little-endian", Func, 0, (Fnptr)unicode_out_le },
       +        { "us-ascii", "alias for ascii (MIME)", Table, (void *)tabascii },
                { "utf", "FSS-UTF a.k.a. UTF-8", From|Func, 0, (Fnptr)utf_in },
                { "utf", "FSS-UTF a.k.a. UTF-8", Func, 0, (Fnptr)utf_out },
       -        { "utf-l2", "from", From|Func, 0, (Fnptr)utf_in },
       -        { "utf-l2", "to", Func, 0, (Fnptr)utf_out },
       +        { "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
       +        { "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
       +        { "utf-8", "alias for utf (MIME)", From|Func, 0, (Fnptr)utf_in },
       +        { "utf-8", "alias for utf (MIME)", Func, 0, (Fnptr)utf_out },
       +        { "utf-16", "alias for unicode (MIME)", From|Func, 0, (Fnptr)unicode_in },
       +        { "utf-16", "alias for unicode (MIME)", Func, 0, (Fnptr)unicode_out },
       +        { "utf-16be", "alias for unicode-be (MIME)", From|Func, 0, (Fnptr)unicode_in_be },
       +        { "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
       +        { "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
       +        { "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
                { "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
                { "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
       -        { "viscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
       +        { "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
       +        { "windows-1250", "Windows Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
       +        { "windows-1251", "Windows Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
       +        { "windows-1252", "Windows Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
       +        { "windows-1253", "Windows Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
       +        { "windows-1254", "Windows Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
       +        { "windows-1255", "Windows Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
       +        { "windows-1256", "Windows Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
       +        { "windows-1257", "Windows Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
       +        { "windows-1258", "Windows Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
                { 0 }
        };
   DIR diff --git a/src/cmd/tcs/utf.c b/src/cmd/tcs/utf.c
       t@@ -45,15 +45,15 @@ utf_in(int fd, long *notused, struct convert *out)
                tot = 0;
                while((n = read(fd, buf+tot, N-tot)) >= 0){
                        tot += n;
       -                for(i=j=0; i<tot; ){
       +                for(i=j=0; i<tot-UTFmax || (n==0 && i<tot); ){
                                c = our_mbtowc(&l, buf+i, tot-i);
       -                        if(c == -2)
       -                                break;
                                if(c == -1){
                                        if(squawk)
                                                EPR "%s: bad UTF sequence near byte %ld in input\n", argv0, ninput+i);
       -                                if(clean)
       +                                if(clean){
       +                                        i++;
                                                continue;
       +                                }
                                        nerrors++;
                                        l = Runeerror;
                                        c = 1;
       t@@ -69,6 +69,7 @@ utf_in(int fd, long *notused, struct convert *out)
                        if(n == 0)
                                break;
                }
       +        OUT(out, runes, 0);
        }
        
        void
       t@@ -100,11 +101,13 @@ isoutf_in(int fd, long *notused, struct convert *out)
                                if(!fullisorune(buf+i, tot-i))
                                        break;
                                c = isochartorune(&runes[j], buf+i);
       -                        if(runes[j] == Runeerror){
       +                        if(runes[j] == Runeerror && c == 1){
                                        if(squawk)
                                                EPR "%s: bad UTF sequence near byte %ld in input\n", argv0, ninput+i);
       -                                if(clean)
       +                                if(clean){
       +                                        i++;
                                                continue;
       +                                }
                                        nerrors++;
                                }
                                j++;
       t@@ -118,6 +121,7 @@ isoutf_in(int fd, long *notused, struct convert *out)
                        if(n == 0)
                                break;
                }
       +        OUT(out, runes, 0);
        }
        
        void
       t@@ -393,19 +397,19 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
                        return 0;                /* no shift states */
        
                if(n < 1)
       -                goto badlen;
       +                goto bad;
                us = (uchar*)s;
                c0 = us[0];
                if(c0 >= T3) {
                        if(n < 3)
       -                        goto badlen;
       +                        goto bad;
                        c1 = us[1] ^ Tx;
                        c2 = us[2] ^ Tx;
                        if((c1|c2) & T2)
                                goto bad;
                        if(c0 >= T5) {
                                if(n < 5)
       -                                goto badlen;
       +                                goto bad;
                                c3 = us[3] ^ Tx;
                                c4 = us[4] ^ Tx;
                                if((c3|c4) & T2)
       t@@ -413,7 +417,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
                                if(c0 >= T6) {
                                        /* 6 bytes */
                                        if(n < 6)
       -                                        goto badlen;
       +                                        goto bad;
                                        c5 = us[5] ^ Tx;
                                        if(c5 & T2)
                                                goto bad;
       t@@ -437,7 +441,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
                        if(c0 >= T4) {
                                /* 4 bytes */
                                if(n < 4)
       -                                goto badlen;
       +                                goto bad;
                                c3 = us[3] ^ Tx;
                                if(c3 & T2)
                                        goto bad;
       t@@ -460,7 +464,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
                if(c0 >= T2) {
                        /* 2 bytes */
                        if(n < 2)
       -                        goto badlen;
       +                        goto bad;
                        c1 = us[1] ^ Tx;
                        if(c1 & T2)
                                goto bad;
       t@@ -480,6 +484,4 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
        bad:
                errno = EILSEQ;
                return -1;
       -badlen:
       -        return -2;
        }