utf8pad: fix byte-seek issue with negative width codepoints in the range >= 127 - stagit-gopher - A git gopher frontend. (mirror) HTML git clone git://bitreich.org/stagit-gopher/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/stagit-gopher/ DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit 05a08e8ab50a8da5b2896c3f5887801d059f48dd DIR parent a9c90b585f158f98dd0997d1509e83f85dd87498 HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org> Date: Sat, 9 Jan 2021 16:19:18 +0100 utf8pad: fix byte-seek issue with negative width codepoints in the range >= 127 For example: "\xef\xbf\xb7" (codepoint 0xfff7), returns wcwidth(wc) == -1. The next byte was incorrected seeked, but the codepoint itself was valid (mbtowc). Diffstat: M stagit-gopher-index.c | 7 +++---- M stagit-gopher.c | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) --- DIR diff --git a/stagit-gopher-index.c b/stagit-gopher-index.c @@ -38,19 +38,18 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) slen = strlen(s); for (i = 0; i < slen; i += inc) { - inc = 1; + inc = 1; /* next byte */ if ((unsigned char)s[i] < 32) continue; rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4); + inc = rl; if (rl < 0) { mbtowc(NULL, NULL, 0); /* reset state */ - inc = 1; /* next byte */ + inc = 1; /* invalid, seek next byte */ w = 1; /* replacement char is one width */ } else if ((w = wcwidth(wc)) == -1) { continue; - } else { - inc = rl; } if (col + w > len || (col + w == len && s[i + inc])) { DIR diff --git a/stagit-gopher.c b/stagit-gopher.c @@ -100,19 +100,18 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) slen = strlen(s); for (i = 0; i < slen; i += inc) { - inc = 1; + inc = 1; /* next byte */ if ((unsigned char)s[i] < 32) continue; rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4); + inc = rl; if (rl < 0) { mbtowc(NULL, NULL, 0); /* reset state */ - inc = 1; /* next byte */ + inc = 1; /* invalid, seek next byte */ w = 1; /* replacement char is one width */ } else if ((w = wcwidth(wc)) == -1) { continue; - } else { - inc = rl; } if (col + w > len || (col + w == len && s[i + inc])) {