URI: 
       sync drw.{c,h} from dmenu - dwm - dynamic window manager
  HTML git clone git://git.suckless.org/dwm
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 8933ebcf50024f4378a78e556b1ac08091197206
   DIR parent 5687f4696472ba6029bbba18e293e3e8b9e154ea
  HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat,  5 Oct 2024 13:01:49 +0200
       
       sync drw.{c,h} from dmenu
       
       - drw: minor improvement to the nomatches cache
       - overhaul utf8decoding and render invalid utf8 sequences as U+FFFD.
       
       Thanks NRK for these improvements!
       
       Diffstat:
         M drw.c                               |     114 +++++++++++++++----------------
         M dwm.c                               |       1 -
         M util.h                              |       1 +
       
       3 files changed, 56 insertions(+), 60 deletions(-)
       ---
   DIR diff --git a/drw.c b/drw.c
       @@ -9,54 +9,40 @@
        #include "util.h"
        
        #define UTF_INVALID 0xFFFD
       -#define UTF_SIZ     4
        
       -static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80,    0, 0xC0, 0xE0, 0xF0};
       -static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
       -static const long utfmin[UTF_SIZ + 1] = {       0,    0,  0x80,  0x800,  0x10000};
       -static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
       -
       -static long
       -utf8decodebyte(const char c, size_t *i)
       -{
       -        for (*i = 0; *i < (UTF_SIZ + 1); ++(*i))
       -                if (((unsigned char)c & utfmask[*i]) == utfbyte[*i])
       -                        return (unsigned char)c & ~utfmask[*i];
       -        return 0;
       -}
       -
       -static size_t
       -utf8validate(long *u, size_t i)
       -{
       -        if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
       -                *u = UTF_INVALID;
       -        for (i = 1; *u > utfmax[i]; ++i)
       -                ;
       -        return i;
       -}
       -
       -static size_t
       -utf8decode(const char *c, long *u, size_t clen)
       +static int
       +utf8decode(const char *s_in, long *u, int *err)
        {
       -        size_t i, j, len, type;
       -        long udecoded;
       -
       +        static const unsigned char lens[] = {
       +                /* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       +                /* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0,  /* invalid */
       +                /* 110XX */ 2, 2, 2, 2,
       +                /* 1110X */ 3, 3,
       +                /* 11110 */ 4,
       +                /* 11111 */ 0,  /* invalid */
       +        };
       +        static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
       +        static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 };
       +
       +        const unsigned char *s = (const unsigned char *)s_in;
       +        int len = lens[*s >> 3];
                *u = UTF_INVALID;
       -        if (!clen)
       -                return 0;
       -        udecoded = utf8decodebyte(c[0], &len);
       -        if (!BETWEEN(len, 1, UTF_SIZ))
       +        *err = 1;
       +        if (len == 0)
                        return 1;
       -        for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
       -                udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
       -                if (type)
       -                        return j;
       +
       +        long cp = s[0] & leading_mask[len - 1];
       +        for (int i = 1; i < len; ++i) {
       +                if (s[i] == '\0' || (s[i] & 0xC0) != 0x80)
       +                        return i;
       +                cp = (cp << 6) | (s[i] & 0x3F);
                }
       -        if (j < len)
       -                return 0;
       -        *u = udecoded;
       -        utf8validate(u, len);
       +        /* out of range, surrogate, overlong encoding */
       +        if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1])
       +                return len;
        
       +        *err = 0;
       +        *u = cp;
                return len;
        }
        
       @@ -238,11 +224,11 @@ drw_rect(Drw *drw, int x, int y, unsigned int w, unsigned int h, int filled, int
        int
        drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char *text, int invert)
        {
       -        int i, ty, ellipsis_x = 0;
       -        unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len;
       +        int ty, ellipsis_x = 0;
       +        unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
                XftDraw *d = NULL;
                Fnt *usedfont, *curfont, *nextfont;
       -        int utf8strlen, utf8charlen, render = x || y || w || h;
       +        int utf8strlen, utf8charlen, utf8err, render = x || y || w || h;
                long utf8codepoint = 0;
                const char *utf8str;
                FcCharSet *fccharset;
       @@ -251,9 +237,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                XftResult result;
                int charexists = 0, overflow = 0;
                /* keep track of a couple codepoints for which we have no match. */
       -        enum { nomatches_len = 64 };
       -        static struct { long codepoint[nomatches_len]; unsigned int idx; } nomatches;
       -        static unsigned int ellipsis_width = 0;
       +        static unsigned int nomatches[128], ellipsis_width, invalid_width;
       +        static const char invalid[] = "�";
        
                if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
                        return 0;
       @@ -273,12 +258,14 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                usedfont = drw->fonts;
                if (!ellipsis_width && render)
                        ellipsis_width = drw_fontset_getwidth(drw, "...");
       +        if (!invalid_width && render)
       +                invalid_width = drw_fontset_getwidth(drw, invalid);
                while (1) {
       -                ew = ellipsis_len = utf8strlen = 0;
       +                ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
                        utf8str = text;
                        nextfont = NULL;
                        while (*text) {
       -                        utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ);
       +                        utf8charlen = utf8decode(text, &utf8codepoint, &utf8err);
                                for (curfont = drw->fonts; curfont; curfont = curfont->next) {
                                        charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
                                        if (charexists) {
       @@ -300,9 +287,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                                                        else
                                                                utf8strlen = ellipsis_len;
                                                } else if (curfont == usedfont) {
       -                                                utf8strlen += utf8charlen;
                                                        text += utf8charlen;
       -                                                ew += tmpw;
       +                                                utf8strlen += utf8err ? 0 : utf8charlen;
       +                                                ew += utf8err ? 0 : tmpw;
                                                } else {
                                                        nextfont = curfont;
                                                }
       @@ -310,7 +297,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                                        }
                                }
        
       -                        if (overflow || !charexists || nextfont)
       +                        if (overflow || !charexists || nextfont || utf8err)
                                        break;
                                else
                                        charexists = 0;
       @@ -325,6 +312,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                                x += ew;
                                w -= ew;
                        }
       +                if (utf8err && (!render || invalid_width < w)) {
       +                        if (render)
       +                                drw_text(drw, x, y, w, h, 0, invalid, invert);
       +                        x += invalid_width;
       +                        w -= invalid_width;
       +                }
                        if (render && overflow)
                                drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);
        
       @@ -338,11 +331,14 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                                 * character must be drawn. */
                                charexists = 1;
        
       -                        for (i = 0; i < nomatches_len; ++i) {
       -                                /* avoid calling XftFontMatch if we know we won't find a match */
       -                                if (utf8codepoint == nomatches.codepoint[i])
       -                                        goto no_match;
       -                        }
       +                        hash = (unsigned int)utf8codepoint;
       +                        hash = ((hash >> 16) ^ hash) * 0x21F0AAAD;
       +                        hash = ((hash >> 15) ^ hash) * 0xD35A2D97;
       +                        h0 = ((hash >> 15) ^ hash) % LENGTH(nomatches);
       +                        h1 = (hash >> 17) % LENGTH(nomatches);
       +                        /* avoid expensive XftFontMatch call when we know we won't find a match */
       +                        if (nomatches[h0] == utf8codepoint || nomatches[h1] == utf8codepoint)
       +                                goto no_match;
        
                                fccharset = FcCharSetCreate();
                                FcCharSetAddChar(fccharset, utf8codepoint);
       @@ -371,7 +367,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
                                                curfont->next = usedfont;
                                        } else {
                                                xfont_free(usedfont);
       -                                        nomatches.codepoint[++nomatches.idx % nomatches_len] = utf8codepoint;
       +                                        nomatches[nomatches[h0] ? h1 : h0] = utf8codepoint;
        no_match:
                                                usedfont = drw->fonts;
                                        }
   DIR diff --git a/dwm.c b/dwm.c
       @@ -50,7 +50,6 @@
        #define INTERSECT(x,y,w,h,m)    (MAX(0, MIN((x)+(w),(m)->wx+(m)->ww) - MAX((x),(m)->wx)) \
                                       * MAX(0, MIN((y)+(h),(m)->wy+(m)->wh) - MAX((y),(m)->wy)))
        #define ISVISIBLE(C)            ((C->tags & C->mon->tagset[C->mon->seltags]))
       -#define LENGTH(X)               (sizeof X / sizeof X[0])
        #define MOUSEMASK               (BUTTONMASK|PointerMotionMask)
        #define WIDTH(X)                ((X)->w + 2 * (X)->bw)
        #define HEIGHT(X)               ((X)->h + 2 * (X)->bw)
   DIR diff --git a/util.h b/util.h
       @@ -3,6 +3,7 @@
        #define MAX(A, B)               ((A) > (B) ? (A) : (B))
        #define MIN(A, B)               ((A) < (B) ? (A) : (B))
        #define BETWEEN(X, A, B)        ((A) <= (X) && (X) <= (B))
       +#define LENGTH(X)               (sizeof (X) / sizeof (X)[0])
        
        void die(const char *fmt, ...);
        void *ecalloc(size_t nmemb, size_t size);