mirror of https://git.tukaani.org/xz.git
tuklib_mbstr_width: Change the behavior when wcwidth() is not available
If wcwidth() isn't available (Windows), previously it was assumed that one byte == one column in the terminal. Now it is assumed that one multibyte character == one column. This works better with UTF-8. Languages that only use single-width characters without any combining characters should work correctly with this. In xz, none of po/*.po contain combining characters and only ko.po, zh_CN.po, and zh_TW.po contain fullwidth characters. Thus, "only" those three translations in xz are broken on Windows with the UTF-8 code page. Broken means that column headings in xz -lvv and (only in the master branch) strings in --long-help are misaligned, so it's not a huge problem. I don't know if those three languages displayed perfectly before the UTF-8 change because I hadn't tested translations with native Windows builds before. Fixes:46ee006162
(cherry picked from commitb797c44c42
)
This commit is contained in:
parent
4ff609adb0
commit
4e0ebbabe4
|
@ -12,7 +12,7 @@
|
|||
#include "tuklib_mbstr.h"
|
||||
#include <string.h>
|
||||
|
||||
#if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
|
||||
#ifdef HAVE_MBRTOWC
|
||||
# include <wchar.h>
|
||||
#endif
|
||||
|
||||
|
@ -24,7 +24,7 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
|
|||
if (bytes != NULL)
|
||||
*bytes = len;
|
||||
|
||||
#if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
|
||||
#ifndef HAVE_MBRTOWC
|
||||
// In single-byte mode, the width of the string is the same
|
||||
// as its length.
|
||||
return len;
|
||||
|
@ -46,11 +46,20 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
|
|||
|
||||
i += ret;
|
||||
|
||||
#ifdef HAVE_WCWIDTH
|
||||
const int wc_width = wcwidth(wc);
|
||||
if (wc_width < 0)
|
||||
return (size_t)-1;
|
||||
|
||||
width += (size_t)wc_width;
|
||||
#else
|
||||
// Without wcwidth() (like in a native Windows build),
|
||||
// assume that one multibyte char == one column. With
|
||||
// UTF-8, this is less bad than one byte == one column.
|
||||
// This way quite a few languages will be handled correctly
|
||||
// in practice; CJK chars will be very wrong though.
|
||||
++width;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Require that the string ends in the initial shift state.
|
||||
|
|
Loading…
Reference in New Issue