lumidify.org/1/git/ltkx/file/unused/text/utf8.c.gph

  URI:

       utf8.c - ltkx - GUI toolkit for X11 (old)
  HTML git clone git://lumidify.org/ltkx.git (fast, but not encrypted)
  HTML git clone https://lumidify.org/ltkx.git (encrypted, but very slow)
  HTML git clone git://4kcetb7mo7hj6grozzybxtotsub5bempzo4lirzc3437amof2c2impyd.onion/ltkx.git (over tor)
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       utf8.c (18333B)
       ---
            1 /*
            2   Basic UTF-8 manipulation routines
            3   by Jeff Bezanson
            4   placed in the public domain Fall 2005
            5 
            6   This code is designed to provide the utilities you need to manipulate
            7   UTF-8 as an internal string encoding. These functions do not perform the
            8   error checking normally needed when handling UTF-8 data, so if you happen
            9   to be from the Unicode Consortium you will want to flay me alive.
           10   I do this because error checking can be performed at the boundaries (I/O),
           11   with these routines reserved for higher performance on data known to be
           12   valid.
           13   A UTF-8 validation routine is included.
           14 */
           15 #include <stdlib.h>
           16 #include <stdio.h>
           17 #include <string.h>
           18 #include <stdarg.h>
           19 #include <stdint.h>
           20 #include <wchar.h>
           21 #include <wctype.h>
           22 
           23 #ifdef WIN32
           24 #include <malloc.h>
           25 #define snprintf _snprintf
           26 #else
           27 #ifndef __FreeBSD__
           28 #include <alloca.h>
           29 #endif /* __FreeBSD__ */
           30 #endif
           31 #include <assert.h>
           32 
           33 #include "utf8.h"
           34 
           35 static const uint32_t offsetsFromUTF8[6] = {
           36     0x00000000UL, 0x00003080UL, 0x000E2080UL,
           37     0x03C82080UL, 0xFA082080UL, 0x82082080UL
           38 };
           39 
           40 static const char trailingBytesForUTF8[256] = {
           41     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           42     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           43     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           44     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           45     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           46     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           47     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
           48     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
           49 };
           50 
           51 /* returns length of next utf-8 sequence */
           52 size_t u8_seqlen(const char *s)
           53 {
           54     return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1;
           55 }
           56 
           57 /* returns the # of bytes needed to encode a certain character
           58    0 means the character cannot (or should not) be encoded. */
           59 size_t u8_charlen(uint32_t ch)
           60 {
           61     if (ch < 0x80)
           62         return 1;
           63     else if (ch < 0x800)
           64         return 2;
           65     else if (ch < 0x10000)
           66         return 3;
           67     else if (ch < 0x110000)
           68         return 4;
           69     return 0;
           70 }
           71 
           72 size_t u8_codingsize(uint32_t *wcstr, size_t n)
           73 {
           74     size_t i, c=0;
           75 
           76     for(i=0; i < n; i++)
           77         c += u8_charlen(wcstr[i]);
           78     return c;
           79 }
           80 
           81 /* conversions without error checking
           82    only works for valid UTF-8, i.e. no 5- or 6-byte sequences
           83    srcsz = source size in bytes
           84    sz = dest size in # of wide characters
           85 
           86    returns # characters converted
           87    if sz == srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
           88 */
           89 size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz)
           90 {
           91     uint32_t ch;
           92     const char *src_end = src + srcsz;
           93     size_t nb;
           94     size_t i=0;
           95 
           96     if (sz == 0 || srcsz == 0)
           97         return 0;
           98 
           99     while (i < sz) {
          100         if (!isutf(*src)) {     // invalid sequence
          101             dest[i++] = 0xFFFD;
          102             src++;
          103             if (src >= src_end) break;
          104             continue;
          105         }
          106         nb = trailingBytesForUTF8[(unsigned char)*src];
          107         if (src + nb >= src_end)
          108             break;
          109         ch = 0;
          110         switch (nb) {
          111             /* these fall through deliberately */
          112         case 5: ch += (unsigned char)*src++; ch <<= 6;
          113         case 4: ch += (unsigned char)*src++; ch <<= 6;
          114         case 3: ch += (unsigned char)*src++; ch <<= 6;
          115         case 2: ch += (unsigned char)*src++; ch <<= 6;
          116         case 1: ch += (unsigned char)*src++; ch <<= 6;
          117         case 0: ch += (unsigned char)*src++;
          118         }
          119         ch -= offsetsFromUTF8[nb];
          120         dest[i++] = ch;
          121     }
          122     return i;
          123 }
          124 
          125 /* srcsz = number of source characters
          126    sz = size of dest buffer in bytes
          127 
          128    returns # bytes stored in dest
          129    the destination string will never be bigger than the source string.
          130 */
          131 size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz)
          132 {
          133     uint32_t ch;
          134     size_t i = 0;
          135     char *dest0 = dest;
          136     char *dest_end = dest + sz;
          137 
          138     while (i < srcsz) {
          139         ch = src[i];
          140         if (ch < 0x80) {
          141             if (dest >= dest_end)
          142                 break;
          143             *dest++ = (char)ch;
          144         }
          145         else if (ch < 0x800) {
          146             if (dest >= dest_end-1)
          147                 break;
          148             *dest++ = (ch>>6) | 0xC0;
          149             *dest++ = (ch & 0x3F) | 0x80;
          150         }
          151         else if (ch < 0x10000) {
          152             if (dest >= dest_end-2)
          153                 break;
          154             *dest++ = (ch>>12) | 0xE0;
          155             *dest++ = ((ch>>6) & 0x3F) | 0x80;
          156             *dest++ = (ch & 0x3F) | 0x80;
          157         }
          158         else if (ch < 0x110000) {
          159             if (dest >= dest_end-3)
          160                 break;
          161             *dest++ = (ch>>18) | 0xF0;
          162             *dest++ = ((ch>>12) & 0x3F) | 0x80;
          163             *dest++ = ((ch>>6) & 0x3F) | 0x80;
          164             *dest++ = (ch & 0x3F) | 0x80;
          165         }
          166         i++;
          167     }
          168     return (dest-dest0);
          169 }
          170 
          171 size_t u8_wc_toutf8(char *dest, uint32_t ch)
          172 {
          173     if (ch < 0x80) {
          174         dest[0] = (char)ch;
          175         return 1;
          176     }
          177     if (ch < 0x800) {
          178         dest[0] = (ch>>6) | 0xC0;
          179         dest[1] = (ch & 0x3F) | 0x80;
          180         return 2;
          181     }
          182     if (ch < 0x10000) {
          183         dest[0] = (ch>>12) | 0xE0;
          184         dest[1] = ((ch>>6) & 0x3F) | 0x80;
          185         dest[2] = (ch & 0x3F) | 0x80;
          186         return 3;
          187     }
          188     if (ch < 0x110000) {
          189         dest[0] = (ch>>18) | 0xF0;
          190         dest[1] = ((ch>>12) & 0x3F) | 0x80;
          191         dest[2] = ((ch>>6) & 0x3F) | 0x80;
          192         dest[3] = (ch & 0x3F) | 0x80;
          193         return 4;
          194     }
          195     return 0;
          196 }
          197 
          198 /* charnum => byte offset */
          199 size_t u8_offset(const char *s, size_t charnum)
          200 {
          201     size_t i=0;
          202 
          203     while (charnum > 0) {
          204         if (s[i++] & 0x80) {
          205             (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
          206         }
          207         charnum--;
          208     }
          209     return i;
          210 }
          211 
          212 /* byte offset => charnum */
          213 size_t u8_charnum(const char *s, size_t offset)
          214 {
          215     size_t charnum = 0, i=0;
          216 
          217     while (i < offset) {
          218         if (s[i++] & 0x80) {
          219             (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
          220         }
          221         charnum++;
          222     }
          223     return charnum;
          224 }
          225 
          226 /* number of characters in NUL-terminated string */
          227 size_t u8_strlen(const char *s)
          228 {
          229     size_t count = 0;
          230     size_t i = 0, lasti;
          231 
          232     while (1) {
          233         lasti = i;
          234         while (s[i] > 0)
          235             i++;
          236         count += (i-lasti);
          237         if (s[i++]==0) break;
          238         (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
          239         count++;
          240     }
          241     return count;
          242 }
          243 
          244 int wcwidth(wchar_t c);
          245 
          246 size_t u8_strwidth(const char *s)
          247 {
          248     uint32_t ch;
          249     size_t nb, tot=0;
          250     int w;
          251     signed char sc;
          252 
          253     while ((sc = (signed char)*s) != 0) {
          254         if (sc >= 0) {
          255             s++;
          256             if (sc) tot++;
          257         }
          258         else {
          259             if (!isutf(sc)) { tot++; s++; continue; }
          260             nb = trailingBytesForUTF8[(unsigned char)sc];
          261             ch = 0;
          262             switch (nb) {
          263                 /* these fall through deliberately */
          264             case 5: ch += (unsigned char)*s++; ch <<= 6;
          265             case 4: ch += (unsigned char)*s++; ch <<= 6;
          266             case 3: ch += (unsigned char)*s++; ch <<= 6;
          267             case 2: ch += (unsigned char)*s++; ch <<= 6;
          268             case 1: ch += (unsigned char)*s++; ch <<= 6;
          269             case 0: ch += (unsigned char)*s++;
          270             }
          271             ch -= offsetsFromUTF8[nb];
          272             w = wcwidth(ch);  // might return -1
          273             if (w > 0) tot += w;
          274         }
          275     }
          276     return tot;
          277 }
          278 
          279 /* reads the next utf-8 sequence out of a string, updating an index */
          280 uint32_t u8_nextchar(const char *s, size_t *i)
          281 {
          282     uint32_t ch = 0;
          283     size_t sz = 0;
          284 
          285     do {
          286         ch <<= 6;
          287         ch += (unsigned char)s[(*i)];
          288         sz++;
          289     } while (s[*i] && (++(*i)) && !isutf(s[*i]));
          290     ch -= offsetsFromUTF8[sz-1];
          291 
          292     return ch;
          293 }
          294 
          295 /* next character without NUL character terminator */
          296 uint32_t u8_nextmemchar(const char *s, size_t *i)
          297 {
          298     uint32_t ch = 0;
          299     size_t sz = 0;
          300     do {
          301         ch <<= 6;
          302         ch += (unsigned char)s[(*i)++];
          303         sz++;
          304     } while (!isutf(s[*i]));
          305     ch -= offsetsFromUTF8[sz-1];
          306 
          307     return ch;
          308 }
          309 
          310 void u8_inc(const char *s, size_t *i)
          311 {
          312     (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || isutf(s[++(*i)]) || ++(*i));
          313 }
          314 
          315 void u8_dec(const char *s, size_t *i)
          316 {
          317     (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i));
          318 }
          319 
          320 int octal_digit(char c)
          321 {
          322     return (c >= '0' && c <= '7');
          323 }
          324 
          325 int hex_digit(char c)
          326 {
          327     return ((c >= '0' && c <= '9') ||
          328             (c >= 'A' && c <= 'F') ||
          329             (c >= 'a' && c <= 'f'));
          330 }
          331 
          332 char read_escape_control_char(char c)
          333 {
          334     if (c == 'n')
          335         return '\n';
          336     else if (c == 't')
          337         return '\t';
          338     else if (c == 'r')
          339         return '\r';
          340     else if (c == 'e')
          341         return 033; // '\e'
          342     else if (c == 'b')
          343         return '\b';
          344     else if (c == 'f')
          345         return '\f';
          346     else if (c == 'v')
          347         return '\v';
          348     else if (c == 'a')
          349         return '\a';
          350     return c;
          351 }
          352 
          353 /* assumes that src points to the character after a backslash
          354    returns number of input characters processed, 0 if error */
          355 size_t u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
          356 {
          357     uint32_t ch;
          358     char digs[10];
          359     int dno=0, ndig;
          360     size_t i=1;
          361     char c0 = str[0];
          362     assert(ssz > 0);
          363 
          364     if (octal_digit(c0)) {
          365         i = 0;
          366         do {
          367             digs[dno++] = str[i++];
          368         } while (i<ssz && octal_digit(str[i]) && dno<3);
          369         digs[dno] = '\0';
          370         ch = strtol(digs, NULL, 8);
          371     }
          372     else if ((c0=='x' && (ndig=2)) ||
          373              (c0=='u' && (ndig=4)) ||
          374              (c0=='U' && (ndig=8))) {
          375         while (i<ssz && hex_digit(str[i]) && dno<ndig) {
          376             digs[dno++] = str[i++];
          377         }
          378         if (dno == 0) return 0;
          379         digs[dno] = '\0';
          380         ch = strtol(digs, NULL, 16);
          381     }
          382     else {
          383         ch = (uint32_t)read_escape_control_char(c0);
          384     }
          385     *dest = ch;
          386 
          387     return i;
          388 }
          389 
          390 /* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
          391    example: u8_unescape(mybuf, 256, "hello\\u220e")
          392    note the double backslash is needed if called on a C string literal */
          393 size_t u8_unescape(char *buf, size_t sz, const char *src)
          394 {
          395     size_t c=0, amt;
          396     uint32_t ch = 0;
          397     char temp[4];
          398 
          399     while (*src && c < sz) {
          400         if (*src == '\\') {
          401             src++;
          402             amt = u8_read_escape_sequence(src, 1000, &ch);
          403         }
          404         else {
          405             ch = (uint32_t)*src;
          406             amt = 1;
          407         }
          408         src += amt;
          409         amt = u8_wc_toutf8(temp, ch);
          410         if (amt > sz-c)
          411             break;
          412         memcpy(&buf[c], temp, amt);
          413         c += amt;
          414     }
          415     if (c < sz)
          416         buf[c] = '\0';
          417     return c;
          418 }
          419 
          420 static int buf_put2c(char *buf, const char *src)
          421 {
          422     buf[0] = src[0];
          423     buf[1] = src[1];
          424     buf[2] = '\0';
          425     return 2;
          426 }
          427 
          428 int u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
          429 {
          430     assert(sz > 2);
          431     if (ch == L'\n')
          432         return buf_put2c(buf, "\\n");
          433     else if (ch == L'\t')
          434         return buf_put2c(buf, "\\t");
          435     else if (ch == L'\r')
          436         return buf_put2c(buf, "\\r");
          437     else if (ch == 033) // L'\e'
          438         return buf_put2c(buf, "\\e");
          439     else if (ch == L'\b')
          440         return buf_put2c(buf, "\\b");
          441     else if (ch == L'\f')
          442         return buf_put2c(buf, "\\f");
          443     else if (ch == L'\v')
          444         return buf_put2c(buf, "\\v");
          445     else if (ch == L'\a')
          446         return buf_put2c(buf, "\\a");
          447     else if (ch == L'\\')
          448         return buf_put2c(buf, "\\\\");
          449     else if (ch < 32 || ch == 0x7f)
          450         return snprintf(buf, sz, "\\x%.2hhx", (unsigned char)ch);
          451     else if (ch > 0xFFFF)
          452         return snprintf(buf, sz, "\\U%.8x", (uint32_t)ch);
          453     else if (ch >= 0x80)
          454         return snprintf(buf, sz, "\\u%.4hx", (unsigned short)ch);
          455 
          456     buf[0] = (char)ch;
          457     buf[1] = '\0';
          458     return 1;
          459 }
          460 
          461 size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
          462                  int escape_quotes, int ascii)
          463 {
          464     size_t i = *pi, i0;
          465     uint32_t ch;
          466     char *start = buf;
          467     char *blim = start + sz-11;
          468     assert(sz > 11);
          469 
          470     while (i<end && buf<blim) {
          471         // sz-11: leaves room for longest escape sequence
          472         if (escape_quotes && src[i] == '"') {
          473             buf += buf_put2c(buf, "\\\"");
          474             i++;
          475         }
          476         else if (src[i] == '\\') {
          477             buf += buf_put2c(buf, "\\\\");
          478             i++;
          479         }
          480         else {
          481             i0 = i;
          482             ch = u8_nextmemchar(src, &i);
          483             if (ascii || !iswprint((wint_t)ch)) {
          484                 buf += u8_escape_wchar(buf, sz - (buf-start), ch);
          485             }
          486             else {
          487                 i = i0;
          488                 do {
          489                     *buf++ = src[i++];
          490                 } while (!isutf(src[i]));
          491             }
          492         }
          493     }
          494     *buf++ = '\0';
          495     *pi = i;
          496     return (buf-start);
          497 }
          498 
          499 char *u8_strchr(const char *s, uint32_t ch, size_t *charn)
          500 {
          501     size_t i = 0, lasti=0;
          502     uint32_t c;
          503 
          504     *charn = 0;
          505     while (s[i]) {
          506         c = u8_nextchar(s, &i);
          507         if (c == ch) {
          508             /* it's const for us, but not necessarily the caller */
          509             return (char*)&s[lasti];
          510         }
          511         lasti = i;
          512         (*charn)++;
          513     }
          514     return NULL;
          515 }
          516 
          517 char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn)
          518 {
          519     size_t i = 0, lasti=0;
          520     uint32_t c;
          521     int csz;
          522 
          523     *charn = 0;
          524     while (i < sz) {
          525         c = csz = 0;
          526         do {
          527             c <<= 6;
          528             c += (unsigned char)s[i++];
          529             csz++;
          530         } while (i < sz && !isutf(s[i]));
          531         c -= offsetsFromUTF8[csz-1];
          532 
          533         if (c == ch) {
          534             return (char*)&s[lasti];
          535         }
          536         lasti = i;
          537         (*charn)++;
          538     }
          539     return NULL;
          540 }
          541 
          542 char *u8_memrchr(const char *s, uint32_t ch, size_t sz)
          543 {
          544     size_t i = sz-1, tempi=0;
          545     uint32_t c;
          546 
          547     if (sz == 0) return NULL;
          548 
          549     while (i && !isutf(s[i])) i--;
          550 
          551     while (1) {
          552         tempi = i;
          553         c = u8_nextmemchar(s, &tempi);
          554         if (c == ch) {
          555             return (char*)&s[i];
          556         }
          557         if (i == 0)
          558             break;
          559         tempi = i;
          560         u8_dec(s, &i);
          561         if (i > tempi)
          562             break;
          563     }
          564     return NULL;
          565 }
          566 
          567 int u8_is_locale_utf8(const char *locale)
          568 {
          569     /* this code based on libutf8 */
          570     const char* cp = locale;
          571 
          572     if (locale == NULL) return 0;
          573 
          574     for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++) {
          575         if (*cp == '.') {
          576             const char* encoding = ++cp;
          577             for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++)
          578                 ;
          579             if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5))
          580                 || (cp-encoding == 4 && !strncmp(encoding, "utf8", 4)))
          581                 return 1; /* it's UTF-8 */
          582             break;
          583         }
          584     }
          585     return 0;
          586 }
          587 
          588 size_t u8_vprintf(const char *fmt, va_list ap)
          589 {
          590     int cnt, sz=0, nc, needfree=0;
          591     char *buf;
          592     uint32_t *wcs;
          593 
          594     sz = 512;
          595     buf = (char*)alloca(sz);
          596     cnt = vsnprintf(buf, sz, fmt, ap);
          597     if (cnt < 0)
          598         return 0;
          599     if (cnt >= sz) {
          600         buf = (char*)malloc(cnt + 1);
          601         needfree = 1;
          602         vsnprintf(buf, cnt+1, fmt, ap);
          603     }
          604     wcs = (uint32_t*)alloca((cnt+1) * sizeof(uint32_t));
          605     nc = u8_toucs(wcs, (size_t)cnt+1, buf, cnt);
          606     wcs[nc] = 0;
          607     printf("%ls", (wchar_t*)wcs);
          608     if (needfree) free(buf);
          609     return nc;
          610 }
          611 
          612 size_t u8_printf(const char *fmt, ...)
          613 {
          614     size_t cnt;
          615     va_list args;
          616 
          617     va_start(args, fmt);
          618 
          619     cnt = u8_vprintf(fmt, args);
          620 
          621     va_end(args);
          622     return cnt;
          623 }
          624 
          625 /* based on the valid_utf8 routine from the PCRE library by Philip Hazel
          626 
          627    length is in bytes, since without knowing whether the string is valid
          628    it's hard to know how many characters there are! */
          629 int u8_isvalid(const char *str, size_t length)
          630 {
          631     const unsigned char *p, *pend = (unsigned char*)str + length;
          632     unsigned char c;
          633     int ret = 1; /* ASCII */
          634     size_t ab;
          635 
          636     for (p = (unsigned char*)str; p < pend; p++) {
          637         c = *p;
          638         if (c < 128)
          639             continue;
          640         ret = 2; /* non-ASCII UTF-8 */
          641         if ((c & 0xc0) != 0xc0)
          642             return 0;
          643         ab = trailingBytesForUTF8[c];
          644         if (length < ab)
          645             return 0;
          646         length -= ab;
          647 
          648         p++;
          649         /* Check top bits in the second byte */
          650         if ((*p & 0xc0) != 0x80)
          651             return 0;
          652 
          653         /* Check for overlong sequences for each different length */
          654         switch (ab) {
          655             /* Check for xx00 000x */
          656         case 1:
          657             if ((c & 0x3e) == 0) return 0;
          658             continue;   /* We know there aren't any more bytes to check */
          659 
          660             /* Check for 1110 0000, xx0x xxxx */
          661         case 2:
          662             if (c == 0xe0 && (*p & 0x20) == 0) return 0;
          663             break;
          664 
          665             /* Check for 1111 0000, xx00 xxxx */
          666         case 3:
          667             if (c == 0xf0 && (*p & 0x30) == 0) return 0;
          668             break;
          669 
          670             /* Check for 1111 1000, xx00 0xxx */
          671         case 4:
          672             if (c == 0xf8 && (*p & 0x38) == 0) return 0;
          673             break;
          674 
          675             /* Check for leading 0xfe or 0xff,
          676                and then for 1111 1100, xx00 00xx */
          677         case 5:
          678             if (c == 0xfe || c == 0xff ||
          679                 (c == 0xfc && (*p & 0x3c) == 0)) return 0;
          680             break;
          681         }
          682 
          683         /* Check for valid bytes after the 2nd, if any; all must start 10 */
          684         while (--ab > 0) {
          685             if ((*(++p) & 0xc0) != 0x80) return 0;
          686         }
          687     }
          688 
          689     return ret;
          690 }
          691 
          692 int u8_reverse(char *dest, char * src, size_t len)
          693 {
          694     size_t si=0, di=len;
          695     unsigned char c;
          696 
          697     dest[di] = '\0';
          698     while (si < len) {
          699         c = (unsigned char)src[si];
          700         if ((~c) & 0x80) {
          701             di--;
          702             dest[di] = c;
          703             si++;
          704         }
          705         else {
          706             switch (c>>4) {
          707             case 0xC:
          708             case 0xD:
          709                 di -= 2;
          710                 *((int16_t*)&dest[di]) = *((int16_t*)&src[si]);
          711                 si += 2;
          712                 break;
          713             case 0xE:
          714                 di -= 3;
          715                 dest[di] = src[si];
          716                 *((int16_t*)&dest[di+1]) = *((int16_t*)&src[si+1]);
          717                 si += 3;
          718                 break;
          719             case 0xF:
          720                 di -= 4;
          721                 *((int32_t*)&dest[di]) = *((int32_t*)&src[si]);
          722                 si += 4;
          723                 break;
          724             default:
          725                 return 1;
          726             }
          727         }
          728     }
          729     return 0;
          730 }