From 51f038f8cbd5d8a95954c05bfcbbc32f2a313615 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Mon, 13 Jan 2025 08:44:58 +0200 Subject: [PATCH] liblzma: memcmplen.h: Use 8-byte method on 64-bit unaligned archs Previously it was enabled only on x86-64 and ARM64 when also support for unaligned access was detected or manually enabled at built time. In the default build configuration, the 8-byte method is now enabled also on 64-bit RISC-V and 64-bit PowerPC (both endiannesses). It was reported that on big endian POWER9, encoding time may reduce 12-13 %. This change only affects builds with GCC and Clang because the code uses __builtin_ctzll or __builtin_clzll. Thanks to Marcus Comstedt for testing on POWER9. --- src/liblzma/common/memcmplen.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/liblzma/common/memcmplen.h b/src/liblzma/common/memcmplen.h index 86b5d6f3..82e90854 100644 --- a/src/liblzma/common/memcmplen.h +++ b/src/liblzma/common/memcmplen.h @@ -58,8 +58,7 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ && (((TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) \ - && (defined(__x86_64__) \ - || defined(__aarch64__))) \ + && SIZE_MAX == UINT64_MAX) \ || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \ || (defined(__INTEL_COMPILER) && defined(_M_X64)) \ || (defined(_MSC_VER) && (defined(_M_X64) \