working prototype of libtext - iomenu - interactive terminal-based selection menu HTML git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit 353e9eb3f5f4df50ed5802ee14c593c29eba706e DIR parent 61b9b7eeca080291752b9813705c17208e83505a HTML Author: Josuah Demangeonā ā µ <mail@josuah.net> Date: Sun, 2 Apr 2017 14:56:16 +0200 working prototype of libtext Diffstat: M text.c | 162 ++++++++++++++++--------------- M text.h | 15 ++++++++++----- 2 files changed, 93 insertions(+), 84 deletions(-) --- DIR diff --git a/text.c b/text.c @@ -2,8 +2,8 @@ * Functions handling UTF-8 srings: * * stdin -> buffer -> stdout - * char[] -> long[] -> char[] * UTF-8 -> rune -> UTF-8 + * char[] -> long[] -> char[] */ @@ -15,15 +15,16 @@ /* - * Return the number of bytes in rune for the `len` next char in `s`, - * or 0 if `utf` is misencoded. + * Return the number of bytes in rune for the `n` next char in `s`, + * or 0 if ti is misencoded. * - * Thanks to Connor Lane Smith for some ideas. + * Thanks to Connor Lane Smith for the idea of using 0x??. */ int -utflen(char *s, int n) { +utflen(char *s, int n) +{ int len = 1; - int contiunation_bytes = + int continuation_bytes = (s[0] & 0x80) == 0x00 ? 0 : /* 0xxxxxxx */ (s[0] & 0xc0) == 0x80 ? 1 : /* 10xxxxxx */ (s[0] & 0xe0) == 0xc0 ? 2 : /* 110xxxxx */ @@ -34,11 +35,12 @@ utflen(char *s, int n) { (s[0] & 0xff) == 0xfe ? 7 : /* 11111110 */ 8; /* 11111111 */ - if (contiunation_bytes > 6 || contiunation_bytes > n) + if (continuation_bytes > 6 || continuation_bytes > n) return 0; /* check if continuation bytes are 10xxxxxx and increment `len` */ - switch (contiunation_bytes) { /* FALLTHROUGH */ + switch (continuation_bytes) { /* FALLTHROUGH */ + case 7: if ((s[6] & 0xc0) != 0x80) return 0; else len++; case 6: if ((s[5] & 0xc0) != 0x80) return 0; else len++; case 5: if ((s[4] & 0xc0) != 0x80) return 0; else len++; case 4: if ((s[3] & 0xc0) != 0x80) return 0; else len++; @@ -51,10 +53,11 @@ utflen(char *s, int n) { /* - * return the number of bytes required to display `rune` + * Return the number of bytes required to display `rune` */ int -runelen(long r) { +runelen(long r) +{ if (r <= 0x0000007f) return 1; if (r <= 0x000007ff) return 2; if (r <= 0x0000ffff) return 3; @@ -66,12 +69,13 @@ runelen(long r) { /* - * return the firsts `len` bytes in the sring poined by `utf` to a rune. - * if the `utf` is misencoded, the first char is returned as a - * negative value. + * Sets `r` to a rune corresponding to the firsts `n` bytes of `s` + * and return the number of bytes read. + * if `s` is misencoded, the rune is stored as a negative value. */ int -utftorune(long *r, char *s, int n) { +utftorune(long *r, char *s, int n) +{ int len = utflen(s, n); /* first byte */ @@ -100,36 +104,37 @@ utftorune(long *r, char *s, int n) { /* - * return the next rune in the `len` next `utf`, or 0 if - * `utf` is misencoded. + * Encode the rune `r` in utf-8 in `s`, null-terminated, and return + * the number of bytes written, 0 if `r` is invalid. */ int -runetoutf(char *s, long r) { +runetoutf(char *s, long r) +{ switch (runelen(r)) { case 1: - s[0] = r; /* 0xxxxxxx */ + s[0] = r; /* 0xxxxxxx */ s[1] = '\0'; return 1; case 2: - s[0] = 0xc0 | (0x3f & (r >> 6)); /* 110xxxxx */ - s[1] = 0x80 | (0x3f & (r)); /* 10xxxxxx */ + s[0] = 0xc0 | (0x1f & (r >> 6)); /* 110xxxxx */ + s[1] = 0x80 | (0x3f & (r)); /* 10xxxxxx */ s[2] = '\0'; return 2; case 3: - s[0] = 0xe0 | (0x3f & (r >> 12)); /* 1110xxxx */ - s[1] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */ - s[2] = 0x80 | (0x3f & (r)); /* 10xxxxxx */ + s[0] = 0xe0 | (0x0f & (r >> 12)); /* 1110xxxx */ + s[1] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */ + s[2] = 0x80 | (0x3f & (r)); /* 10xxxxxx */ s[3] = '\0'; return 3; case 4: - s[0] = 0xf0 | (0x3f & (r >> 6)); /* 11110xxx */ - s[1] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */ - s[2] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */ - s[3] = 0x80 | (0x3f & (r)); /* 10xxxxxx */ + s[0] = 0xf0 | (0x07 & (r >> 18)); /* 11110xxx */ + s[1] = 0x80 | (0x3f & (r >> 12)); /* 10xxxxxx */ + s[2] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */ + s[3] = 0x80 | (0x3f & (r)); /* 10xxxxxx */ s[4] = '\0'; return 4; case 5: - s[0] = 0xf8 | (0x3f & (r >> 24)); /* 111110xx */ + s[0] = 0xf8 | (0x03 & (r >> 24)); /* 111110xx */ s[1] = 0x80 | (0x3f & (r >> 18)); /* 10xxxxxx */ s[2] = 0x80 | (0x3f & (r >> 12)); /* 10xxxxxx */ s[3] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */ @@ -137,7 +142,7 @@ runetoutf(char *s, long r) { s[5] = '\0'; return 5; case 6: - s[0] = 0xfc | (0x3f & (r >> 30)); /* 1111110x */ + s[0] = 0xfc | (0x01 & (r >> 30)); /* 1111110x */ s[1] = 0x80 | (0x3f & (r >> 24)); /* 10xxxxxx */ s[2] = 0x80 | (0x3f & (r >> 18)); /* 10xxxxxx */ s[3] = 0x80 | (0x3f & (r >> 12)); /* 10xxxxxx */ @@ -152,31 +157,56 @@ runetoutf(char *s, long r) { /* + * Read a newly allocated string from `f` up to the first '\n' + * character or the end of the fifle. It is stored as a rune array, + * and `r` is set to point to it. + */ +int +getutf(long **r, FILE *f) +{ + int slen, rlen = 0, c, size = BUFSIZ; + char *s; + + if (!(s = malloc(size))) return -1; + for (slen = 0; (c = fgetc(f)) != EOF && (c != '\n'); slen++) { + s[slen] = c; + + if (slen >= size) + if (!(s = realloc(s, ++size))) return -1; + } + + if (!(*r = malloc(size * sizeof (long)))) return -1; + for (int i = 0; i < slen; rlen++) + i += utftorune(*r + rlen, s + i, slen - i); + + free(s); + return rlen; +} + + +/* * Fill `s` with a printable representation of `r` and return the - * width of the character + * width of the character. The tab characters are converted to + * spaces as if it was at the column `col`. */ int runetoprint(char *s, long r, int col) { - /* ASCII control characters and invalid characters */ - if (r == '\t') { + /* invalid */ + if (r < 0) { + sprintf(s, "[%02x]", (unsigned char) -r); + + } else if (r == '\t') { int i; for (i = 0; i < (col + 1) % 8 - 1; i++) s[i] = ' '; - s[i] = '\0'; - - } else if (r < ' ' || r == 0x7f) { - sprintf(s, "[%02x]", (char) r); - - /* non-breaking space */ - } else if (r == 0xa0) { - sprintf(s, "[ ]"); + s[i] = '\0'; s[0] = '|'; - /* soft hyphen */ - } else if (r == 0xad) { - sprintf(s, "[-]"); + /* ascii control */ + } else if (r == 0x7f || r < ' ') { + sprintf(s, "[%02lx]", r); - /* valid UTF-8 but not printable Unicode code points */ + /* utf-8 but not printable */ } else if ( /* unicode control */ (0x80 <= r && r < 0xa0) || @@ -209,47 +239,21 @@ runetoprint(char *s, long r, int col) } -/* - * Read a newly allocated string `s` from `file` up to the first '\n' - * character or the end of the file. - */ -int -getutf(char **s, FILE *file) -{ - int i; int c; - - *s = malloc(BUFSIZ); - - for (i = 0; (c = fgetc(file)) != EOF && (c != '\n'); i++) { - (*s)[i] = c; - - if ((size_t) i + 16 >= sizeof(s)) - *s = realloc(*s, sizeof(s) + BUFSIZ); - } - - return i; -} - - int main() { - char s[7]; - long r; - - for (int i = 0; i < 9000; i++) { - runetoutf(s, i); - utftorune(&r, s, 7); - runetoutf(s, r); - utftorune(&r, s, 7); - runetoprint(s, r, 0); + char s[BUFSIZ]; + long *r; - printf("%5X: ", r); - printf("'%s'\t", s); + for (int len; (len = getutf(&r, stdin)) >= 0 && !feof(stdin); free(r)) { + for (int i = 0; i < len; i++) { + runetoprint(s, r[i], 0); + fputs(s, stdout); + } - if (i % 8 == 0) - puts(""); + putchar('\n'); } + free(r); return 0; } DIR diff --git a/text.h b/text.h @@ -1,6 +1,11 @@ -typedef int Rune; - +/* rune / utf length */ int utflen(char *, int); -int runelen(Rune); -int utftorune(Rune *, char *, int); -int runetoutf(char *, Rune); +int runelen(long); + +/* decode / encode */ +int utftorune(long *, char *, int); +int runetoutf(char *, long); + +/* stdin / stdout */ +int getutf(long **, FILE *); +int runetoprint(char *, long, int);