mirror of
				https://git.tukaani.org/xz.git
				synced 2025-10-31 05:22:55 +00:00 
			
		
		
		
	tuklib_mbstr_width: Change the behavior when wcwidth() is not available
If wcwidth() isn't available (Windows), previously it was assumed that one byte == one column in the terminal. Now it is assumed that one multibyte character == one column. This works better with UTF-8. Languages that only use single-width characters without any combining characters should work correctly with this. In xz, none of po/*.po contain combining characters and only ko.po, zh_CN.po, and zh_TW.po contain fullwidth characters. Thus, "only" those three translations in xz are broken on Windows with the UTF-8 code page. Broken means that column headings in xz -lvv and (only in the master branch) strings in --long-help are misaligned, so it's not a huge problem. I don't know if those three languages displayed perfectly before the UTF-8 change because I hadn't tested translations with native Windows builds before. Fixes: 46ee0061629fb075d61d83839e14dd193337af59 (cherry picked from commit b797c44c42ea54fe1c52722a2fca0c9618575598)
This commit is contained in:
		
							parent
							
								
									4ff609adb0
								
							
						
					
					
						commit
						4e0ebbabe4
					
				| @ -12,7 +12,7 @@ | ||||
| #include "tuklib_mbstr.h" | ||||
| #include <string.h> | ||||
| 
 | ||||
| #if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) | ||||
| #ifdef HAVE_MBRTOWC | ||||
| #	include <wchar.h> | ||||
| #endif | ||||
| 
 | ||||
| @ -24,7 +24,7 @@ tuklib_mbstr_width(const char *str, size_t *bytes) | ||||
| 	if (bytes != NULL) | ||||
| 		*bytes = len; | ||||
| 
 | ||||
| #if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)) | ||||
| #ifndef HAVE_MBRTOWC | ||||
| 	// In single-byte mode, the width of the string is the same
 | ||||
| 	// as its length.
 | ||||
| 	return len; | ||||
| @ -46,11 +46,20 @@ tuklib_mbstr_width(const char *str, size_t *bytes) | ||||
| 
 | ||||
| 		i += ret; | ||||
| 
 | ||||
| #ifdef HAVE_WCWIDTH | ||||
| 		const int wc_width = wcwidth(wc); | ||||
| 		if (wc_width < 0) | ||||
| 			return (size_t)-1; | ||||
| 
 | ||||
| 		width += (size_t)wc_width; | ||||
| #else | ||||
| 		// Without wcwidth() (like in a native Windows build),
 | ||||
| 		// assume that one multibyte char == one column. With
 | ||||
| 		// UTF-8, this is less bad than one byte == one column.
 | ||||
| 		// This way quite a few languages will be handled correctly
 | ||||
| 		// in practice; CJK chars will be very wrong though.
 | ||||
| 		++width; | ||||
| #endif | ||||
| 	} | ||||
| 
 | ||||
| 	// Require that the string ends in the initial shift state.
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user