mirror of https://git.tukaani.org/xz.git
tuklib_mbstr_width: Change the behavior when wcwidth() is not available
If wcwidth() isn't available (Windows), previously it was assumed that one byte == one column in the terminal. Now it is assumed that one multibyte character == one column. This works better with UTF-8. Languages that only use single-width characters without any combining characters should work correctly with this. In xz, none of po/*.po contain combining characters and only ko.po, zh_CN.po, and zh_TW.po contain fullwidth characters. Thus, "only" those three translations in xz are broken on Windows with the UTF-8 code page. Broken means that column headings in xz -lvv and (only in the master branch) strings in --long-help are misaligned, so it's not a huge problem. I don't know if those three languages displayed perfectly before the UTF-8 change because I hadn't tested translations with native Windows builds before. Fixes:46ee006162
(cherry picked from commitb797c44c42
)
This commit is contained in:
parent
4ff609adb0
commit
4e0ebbabe4
|
@ -12,7 +12,7 @@
|
||||||
#include "tuklib_mbstr.h"
|
#include "tuklib_mbstr.h"
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
|
#ifdef HAVE_MBRTOWC
|
||||||
# include <wchar.h>
|
# include <wchar.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
|
||||||
if (bytes != NULL)
|
if (bytes != NULL)
|
||||||
*bytes = len;
|
*bytes = len;
|
||||||
|
|
||||||
#if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
|
#ifndef HAVE_MBRTOWC
|
||||||
// In single-byte mode, the width of the string is the same
|
// In single-byte mode, the width of the string is the same
|
||||||
// as its length.
|
// as its length.
|
||||||
return len;
|
return len;
|
||||||
|
@ -46,11 +46,20 @@ tuklib_mbstr_width(const char *str, size_t *bytes)
|
||||||
|
|
||||||
i += ret;
|
i += ret;
|
||||||
|
|
||||||
|
#ifdef HAVE_WCWIDTH
|
||||||
const int wc_width = wcwidth(wc);
|
const int wc_width = wcwidth(wc);
|
||||||
if (wc_width < 0)
|
if (wc_width < 0)
|
||||||
return (size_t)-1;
|
return (size_t)-1;
|
||||||
|
|
||||||
width += (size_t)wc_width;
|
width += (size_t)wc_width;
|
||||||
|
#else
|
||||||
|
// Without wcwidth() (like in a native Windows build),
|
||||||
|
// assume that one multibyte char == one column. With
|
||||||
|
// UTF-8, this is less bad than one byte == one column.
|
||||||
|
// This way quite a few languages will be handled correctly
|
||||||
|
// in practice; CJK chars will be very wrong though.
|
||||||
|
++width;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Require that the string ends in the initial shift state.
|
// Require that the string ends in the initial shift state.
|
||||||
|
|
Loading…
Reference in New Issue