add more useful functions to utf.h - iomenu - interactive terminal-based selection menu HTML git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit 39fe39ddae414f337914be1aaa03915679635415 DIR parent 9adba189e74c6b6752d04f8e6954177b35a99bbd HTML Author: Josuah Demangeon <mail@josuah.net> Date: Tue, 16 Jan 2018 22:34:54 +0100 add more useful functions to utf.h Diffstat: M iomenu.c | 2 +- A main.c | 18 ++++++++++++++++++ M utf8.c | 78 ++++++++++++++++++++++--------- M utf8.h | 7 ++++--- 4 files changed, 80 insertions(+), 25 deletions(-) --- DIR diff --git a/iomenu.c b/iomenu.c @@ -350,7 +350,7 @@ format(char *str, int cols) col++; } str++; - } else if (utf8_to_rune(&rune, str) && utf8_is_print(rune)) { + } else if (utf8_torune(&rune, str) && utf8_isprint(rune)) { int i = utf8_len(str); while (i--) *fmt++ = *str++; DIR diff --git a/main.c b/main.c @@ -0,0 +1,18 @@ +#include <stdio.h> +#include "utf8.h" + +int +main(void) +{ + int n = 100; + char s[] = "浪漫的夢想", *cut; + + if ((cut = utf8_col(s, n)) == NULL) { + printf("the whole string fit\n"); + return 0; + } + printf("%zd\n", cut - s); + *cut = '\0'; + printf("%s\n", s); + return 0; +} DIR diff --git a/utf8.c b/utf8.c @@ -26,32 +26,36 @@ #include <stddef.h> #include <stdlib.h> #include <string.h> +#include <wchar.h> /* only used for wcwidth() */ #include "utf8.h" /* * Return the number of bytes in rune for the `n` next char in `s`, - * or 0 if ti is misencoded. + * or 0 if is misencoded or if it is '\0'. */ size_t utf8_len(char *s) { unsigned char *sp = (unsigned char *) s; - int i, len = (*sp < 0x80) ? 1 : /* 0xxxxxxx < 10000000 */ - (*sp < 0xc0) ? 0 : /* 10xxxxxx < 11000000 */ - (*sp < 0xe0) ? 2 : /* 110xxxxx < 11100000 */ - (*sp < 0xf0) ? 3 : /* 1110xxxx < 11110000 */ - (*sp < 0xf8) ? 4 : /* 11110xxx < 11111000 */ - (*sp < 0xfc) ? 5 : /* 111110xx < 11111100 */ - (*sp < 0xfe) ? 6 : /* 1111110x < 11111110 */ - (*sp < 0xff) ? 7 : /* 11111110 < 11111111 */ - 0; - if ((size_t) len > strlen(s)) return 0; - - /* check continuation bytes */ - for (sp++, i = 1; i < len; i++, sp++) + int i, len; + + len = (*sp == 0x0) ? 0 : /* 00000000 */ + (*sp < 0x80) ? 1 : /* 0xxxxxxx < 10000000 */ + (*sp < 0xc0) ? 0 : /* 10xxxxxx < 11000000 */ + (*sp < 0xe0) ? 2 : /* 110xxxxx < 11100000 */ + (*sp < 0xf0) ? 3 : /* 1110xxxx < 11110000 */ + (*sp < 0xf8) ? 4 : /* 11110xxx < 11111000 */ + (*sp < 0xfc) ? 5 : /* 111110xx < 11111100 */ + (*sp < 0xfe) ? 6 : /* 1111110x < 11111110 */ + (*sp < 0xff) ? 7 : /* 11111110 < 11111111 */ + 0; + + /* check continuation bytes and '\0' */ + for (sp++, i = 1; i < len; i++, sp++) { if ((*sp & 0xc0) != 0x80) /* 10xxxxxx & 11000000 */ return 0; + } return len; } @@ -61,11 +65,11 @@ utf8_len(char *s) * 0 if rune is too long. */ size_t -utf8_rune_len(long r) +utf8_runelen(long r) { - return (r <= 0x0000007f) ? 1 : (r <= 0x000007ff) ? 2 : - (r <= 0x0000ffff) ? 3 : (r <= 0x001fffff) ? 4 : - (r <= 0x03ffffff) ? 5 : (r <= 0x7fffffff) ? 6 : 0; + return (r <= 0x0000007f) ? 1 : (r <= 0x000007ff) ? 2 : + (r <= 0x0000ffff) ? 3 : (r <= 0x001fffff) ? 4 : + (r <= 0x03ffffff) ? 5 : (r <= 0x7fffffff) ? 6 : 0; } /* @@ -74,7 +78,7 @@ utf8_rune_len(long r) * Return the number of bytes read or 0 if the string is misencoded. */ size_t -utf8_to_rune(long *r, char *s) +utf8_torune(long *r, char *s) { char mask[] = { 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; size_t i, len = utf8_len(s); @@ -90,17 +94,49 @@ utf8_to_rune(long *r, char *s) *r = (*r << 6) | (*s++ & 0x3f); /* 10xxxxxx */ /* overlong sequences */ - if (utf8_rune_len(*r) != len) + if (utf8_runelen(*r) != len) return 0; return len; } /* + * + */ +utf8_tostr(char *s, long r) +{ + +} + +/* * Return 1 if the rune is a printable character, and 0 otherwise. */ int -utf8_is_print(long r) +utf8_isprint(long r) { return (0x1f < r && r != 0x7f && r < 0x80) || 0x9f < r; } + +/* + * Return a pointer to the first byte of a character of `s' that would be + * rendered at the `col'-th column in a monospaced terminal, or NULL if the + * whole string fit. + */ +char * +utf8_col(char *s, size_t col) +{ + size_t n; + long r; + char *pos; + + for (n = 0; n < col; n += wcwidth(r)) { + pos = s; + if (*s == '\0') + return NULL; + s += utf8_torune(&r, s); + utf8_toutf(); + printf("%zd:'%s' ", n, s); + } + + return pos; +} DIR diff --git a/utf8.h b/utf8.h @@ -1,6 +1,7 @@ size_t utf8_len(char *); size_t rune_len(long); -size_t utf8_to_rune(long *, char *); -int utf8_is_unicode(long); +size_t utf8_torune(long *, char *); +int utf8_isunicode(long); int utf8_check(char *); -int utf8_is_print(long); +int utf8_isprint(long); +char *utf8_col();