Windows: Use UTF-8 locale when active code page is UTF-8

XZ Utils 5.6.3 set the active code page to UTF-8 to fix CVE-2024-47611.
This wasn't paired with UCRT-specific setlocale(LC_ALL, ".UTF8"), thus
non-ASCII characters from translations became mojibake.

Fixes: 46ee006162
This commit is contained in:
Lasse Collin 2024-12-17 14:59:37 +02:00
parent 20dfca8171
commit 0d0b574cc4
No known key found for this signature in database
GPG Key ID: 38EE757D69184620
1 changed files with 30 additions and 2 deletions

View File

@ -15,6 +15,34 @@
#include "tuklib_common.h"
#include <locale.h>
#if defined(_WIN32) && !defined(__CYGWIN__)
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
// To use UTF-8 code page on Windows 10 version 1903 and later, the
// active code page has to be set to UTF-8 in the application manifest
// and UCRT-specific setlocale(LC_ALL, ".UTF8") must be called. The
// manifest makes argv[] use UTF-8 (which setlocale() cannot affect)
// and the special setlocale() call makes mbrtowc() and such functions
// use UTF-8. (It's weird why regular setlocale(LC_ALL, "") doesn't
// use the code page from the application manifest.)
//
// The two things have quite a bit of overlap though. For example,
// both affect the code page used in the file system APIs. Thus,
// if argv[] isn't in UTF-8, using setlocale() to set UTF-8 will
// break non-ASCII filenames that have been passed as command line
// arguments. Thus, it's best to set an UTF-8 locale only when
// the active code page is UTF-8.
//
// NOTE: Only UCRT supports the UTF-8 locale string, thus this
// will fail with MSVCRT if the active code page is UTF-8. That
// shouldn't be too bad because UTF-8 doesn't work properly with
// MSVCRT anyway.
# define tuklib_gettext_setlocale() \
setlocale(LC_ALL, GetACP() == CP_UTF8 ? ".UTF8" : "")
#else
# define tuklib_gettext_setlocale() setlocale(LC_ALL, "")
#endif
#ifndef TUKLIB_GETTEXT
# ifdef ENABLE_NLS
# define TUKLIB_GETTEXT 1
@ -27,14 +55,14 @@
# include <libintl.h>
# define tuklib_gettext_init(package, localedir) \
do { \
setlocale(LC_ALL, ""); \
tuklib_gettext_setlocale(); \
bindtextdomain(package, localedir); \
textdomain(package); \
} while (0)
# define _(msgid) gettext(msgid)
#else
# define tuklib_gettext_init(package, localedir) \
setlocale(LC_ALL, "")
tuklib_gettext_setlocale()
# define _(msgid) (msgid)
# define ngettext(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2))
#endif