From fae37ad2affd8fe8871f4ff93d5cab5ec14d5e58 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Wed, 5 Oct 2022 14:26:00 +0300 Subject: [PATCH] tuklib_integer: Add 64-bit endianness-converting reads and writes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also update the comment in liblzma's memcmplen.h. Thanks to Michał Górny for the original patch for the reads. --- m4/tuklib_integer.m4 | 8 +++--- src/common/tuklib_integer.h | 46 ++++++++++++++++++++++++++++++++-- src/liblzma/common/memcmplen.h | 9 +++---- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/m4/tuklib_integer.m4 b/m4/tuklib_integer.m4 index e9741ef6..9e104729 100644 --- a/m4/tuklib_integer.m4 +++ b/m4/tuklib_integer.m4 @@ -64,8 +64,8 @@ main(void) AC_MSG_CHECKING([if unaligned memory access should be used]) AC_ARG_ENABLE([unaligned-access], AS_HELP_STRING([--enable-unaligned-access], [Enable if the system supports *fast* unaligned memory access - with 16-bit and 32-bit integers. By default, this is enabled - only on x86, x86_64, big endian PowerPC, + with 16-bit, 32-bit, and 64-bit integers. By default, + this is enabled only on x86, x86_64, big endian PowerPC, and some ARM systems.]), [], [enable_unaligned_access=auto]) if test "x$enable_unaligned_access" = xauto ; then @@ -93,8 +93,8 @@ int main(void) { return 0; } fi if test "x$enable_unaligned_access" = xyes ; then AC_DEFINE([TUKLIB_FAST_UNALIGNED_ACCESS], [1], [Define to 1 if - the system supports fast unaligned access to 16-bit and - 32-bit integers.]) + the system supports fast unaligned access to 16-bit, + 32-bit, and 64-bit integers.]) AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) diff --git a/src/common/tuklib_integer.h b/src/common/tuklib_integer.h index 6f44a7a0..b58ef68d 100644 --- a/src/common/tuklib_integer.h +++ b/src/common/tuklib_integer.h @@ -17,8 +17,8 @@ /// - Byte swapping: bswapXX(num) /// - Byte order conversions to/from native (byteswaps if Y isn't /// the native endianness): convXXYe(num) -/// - Unaligned reads (16/32-bit only): readXXYe(ptr) -/// - Unaligned writes (16/32-bit only): writeXXYe(ptr, num) +/// - Unaligned reads: readXXYe(ptr) +/// - Unaligned writes: writeXXYe(ptr, num) /// - Aligned reads: aligned_readXXYe(ptr) /// - Aligned writes: aligned_writeXXYe(ptr, num) /// @@ -343,6 +343,46 @@ read32le(const uint8_t *buf) } +static inline uint64_t +read64be(const uint8_t *buf) +{ +#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) + uint64_t num = read64ne(buf); + return conv64be(num); +#else + uint64_t num = (uint64_t)buf[0] << 56; + num |= (uint64_t)buf[1] << 48; + num |= (uint64_t)buf[2] << 40; + num |= (uint64_t)buf[3] << 32; + num |= (uint64_t)buf[4] << 24; + num |= (uint64_t)buf[5] << 16; + num |= (uint64_t)buf[6] << 8; + num |= (uint64_t)buf[7]; + return num; +#endif +} + + +static inline uint64_t +read64le(const uint8_t *buf) +{ +#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) + uint64_t num = read64ne(buf); + return conv64le(num); +#else + uint64_t num = (uint64_t)buf[0]; + num |= (uint64_t)buf[1] << 8; + num |= (uint64_t)buf[2] << 16; + num |= (uint64_t)buf[3] << 24; + num |= (uint64_t)buf[4] << 32; + num |= (uint64_t)buf[5] << 40; + num |= (uint64_t)buf[6] << 48; + num |= (uint64_t)buf[7] << 56; + return num; +#endif +} + + // NOTE: Possible byte swapping must be done in a macro to allow the compiler // to optimize byte swapping of constants when using glibc's or *BSD's // byte swapping macros. The actual write is done in an inline function @@ -350,11 +390,13 @@ read32le(const uint8_t *buf) #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) # define write16be(buf, num) write16ne(buf, conv16be(num)) # define write32be(buf, num) write32ne(buf, conv32be(num)) +# define write64be(buf, num) write64ne(buf, conv64be(num)) #endif #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) # define write16le(buf, num) write16ne(buf, conv16le(num)) # define write32le(buf, num) write32ne(buf, conv32le(num)) +# define write64le(buf, num) write64ne(buf, conv64le(num)) #endif diff --git a/src/liblzma/common/memcmplen.h b/src/liblzma/common/memcmplen.h index dcfd8d6f..a80428b9 100644 --- a/src/liblzma/common/memcmplen.h +++ b/src/liblzma/common/memcmplen.h @@ -51,10 +51,6 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \ || (defined(__INTEL_COMPILER) && defined(_M_X64)) \ || (defined(_MSC_VER) && defined(_M_X64))) - // NOTE: This will use 64-bit unaligned access which - // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but - // it's convenient here at least as long as it's x86-64 only. - // // I keep this x86-64 only for now since that's where I know this // to be a good method. This may be fine on other 64-bit CPUs too. // On big endian one should use xor instead of subtraction and switch @@ -84,8 +80,9 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, || (defined(__INTEL_COMPILER) && defined(__SSE2__)) \ || (defined(_MSC_VER) && defined(_M_IX86_FP) \ && _M_IX86_FP >= 2)) - // NOTE: Like above, this will use 128-bit unaligned access which - // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit. + // NOTE: This will use 128-bit unaligned access which + // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, + // but it's convenient here since this is x86-only. // // SSE2 version for 32-bit and 64-bit x86. On x86-64 the above // version is sometimes significantly faster and sometimes