commit 59936c7d972587a47d61161279bb8e8abc0b02f3 from: NRK via: Hiltjo Posthuma date: Sun Jul 14 09:42:58 2024 UTC render invalid utf8 sequences as U+FFFD previously drw_text would do the width calculations as if invalid utf8 sequences were replaced with U+FFFD but would pass the invalid utf8 sequence to xft to render where xft would just cut it off at the first invalid byte. this change makes invalid utf8 render as U+FFFD and avoids sending invalid sequences to xft. the following can be used to check the behavior before and after the patch: $ printf "0\xef1234567\ntest" | dmenu Ref: https://lists.suckless.org/dev/2407/35646.html commit - 51e32d49b56c86cd288c64fccf6cd765547781b9 commit + 59936c7d972587a47d61161279bb8e8abc0b02f3 blob - eb71da7f08e52722b11c70faf1a486bed2e59bb1 blob + f151ae5e2780f77aaad53ab9d7255efe9e981311 --- drw.c +++ drw.c @@ -237,7 +237,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsig XftResult result; int charexists = 0, overflow = 0; /* keep track of a couple codepoints for which we have no match. */ - static unsigned int nomatches[128], ellipsis_width; + static unsigned int nomatches[128], ellipsis_width, invalid_width; + static const char invalid[] = "�"; if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts) return 0; @@ -257,6 +258,10 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsig usedfont = drw->fonts; if (!ellipsis_width && render) ellipsis_width = drw_fontset_getwidth(drw, "..."); + if (!invalid_width) { + invalid_width = -1; /* stop infinite recursion */ + invalid_width = drw_fontset_getwidth(drw, invalid); + } while (1) { ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0; utf8str = text; @@ -284,9 +289,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsig else utf8strlen = ellipsis_len; } else if (curfont == usedfont) { - utf8strlen += utf8charlen; text += utf8charlen; - ew += tmpw; + utf8strlen += utf8err ? 0 : utf8charlen; + ew += utf8err ? 0 : tmpw; } else { nextfont = curfont; } @@ -294,7 +299,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsig } } - if (overflow || !charexists || nextfont) + if (overflow || !charexists || nextfont || utf8err) break; else charexists = 0; @@ -309,6 +314,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsig x += ew; w -= ew; } + if (utf8err && (!render || invalid_width < w)) { + if (render) + drw_text(drw, x, y, w, h, 0, invalid, invert); + x += invalid_width; + w -= invalid_width; + } if (render && overflow) drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);