From 4e7a48bf15138034f68ac1c73eb133dc04d7c8ad Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 17 Dec 2024 14:59:37 +0200 Subject: [PATCH] Windows: Use UTF-8 locale when active code page is UTF-8 XZ Utils 5.6.3 set the active code page to UTF-8 to fix CVE-2024-47611. This wasn't paired with UCRT-specific setlocale(LC_ALL, ".UTF8"), thus non-ASCII characters from translations became mojibake. Fixes: 46ee0061629fb075d61d83839e14dd193337af59 (cherry picked from commit 0d0b574cc45045d6150d397776340c068df59e2a) --- src/common/tuklib_gettext.h | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/common/tuklib_gettext.h b/src/common/tuklib_gettext.h index 3ef5cb72..4021c98f 100644 --- a/src/common/tuklib_gettext.h +++ b/src/common/tuklib_gettext.h @@ -15,6 +15,34 @@ #include "tuklib_common.h" #include +#if defined(_WIN32) && !defined(__CYGWIN__) +# define WIN32_LEAN_AND_MEAN +# include + // To use UTF-8 code page on Windows 10 version 1903 and later, the + // active code page has to be set to UTF-8 in the application manifest + // and UCRT-specific setlocale(LC_ALL, ".UTF8") must be called. The + // manifest makes argv[] use UTF-8 (which setlocale() cannot affect) + // and the special setlocale() call makes mbrtowc() and such functions + // use UTF-8. (It's weird why regular setlocale(LC_ALL, "") doesn't + // use the code page from the application manifest.) + // + // The two things have quite a bit of overlap though. For example, + // both affect the code page used in the file system APIs. Thus, + // if argv[] isn't in UTF-8, using setlocale() to set UTF-8 will + // break non-ASCII filenames that have been passed as command line + // arguments. Thus, it's best to set an UTF-8 locale only when + // the active code page is UTF-8. + // + // NOTE: Only UCRT supports the UTF-8 locale string, thus this + // will fail with MSVCRT if the active code page is UTF-8. That + // shouldn't be too bad because UTF-8 doesn't work properly with + // MSVCRT anyway. +# define tuklib_gettext_setlocale() \ + setlocale(LC_ALL, GetACP() == CP_UTF8 ? ".UTF8" : "") +#else +# define tuklib_gettext_setlocale() setlocale(LC_ALL, "") +#endif + #ifndef TUKLIB_GETTEXT # ifdef ENABLE_NLS # define TUKLIB_GETTEXT 1 @@ -27,14 +55,14 @@ # include # define tuklib_gettext_init(package, localedir) \ do { \ - setlocale(LC_ALL, ""); \ + tuklib_gettext_setlocale(); \ bindtextdomain(package, localedir); \ textdomain(package); \ } while (0) # define _(msgid) gettext(msgid) #else # define tuklib_gettext_init(package, localedir) \ - setlocale(LC_ALL, "") + tuklib_gettext_setlocale() # define _(msgid) (msgid) # define ngettext(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2)) #endif