From 0b8fa310cf56fec55663f62340e49e8e1441594f Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 10 Jan 2023 22:14:03 +0200 Subject: [PATCH] liblzma: CLMUL CRC64: Work around a bug in MSVC, second attempt. This affects only 32-bit x86 builds. x86-64 is OK as is. I still cannot easily test this myself. The reporter has tested this and it passes the tests included in the CMake build and performance is good: raw CRC64 is 2-3 times faster than the C version of the slice-by-four method. (Note that liblzma doesn't include a MSVC-compatible version of the 32-bit x86 assembly code for the slice-by-four method.) Thanks to Iouri Kharon for figuring out a fix, testing, and benchmarking. --- src/liblzma/check/crc64_fast.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c index e3cbf1b1..db44633b 100644 --- a/src/liblzma/check/crc64_fast.c +++ b/src/liblzma/check/crc64_fast.c @@ -184,6 +184,20 @@ calc_hi(uint64_t poly, uint64_t a) MASK_H(in, mask, high) +// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC +// code when optimizations are enabled (release build). According to the bug +// report, the ebx register is corrupted and the calculated result is wrong. +// Trying to workaround the problem with "__asm mov ebx, ebx" didn't help. +// The following pragma works and performance is still good. x86-64 builds +// aren't affected by this problem. +// +// NOTE: Another pragma after the function restores the optimizations. +// If the #if condition here is updated, the other one must be updated too. +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined(_M_IX86) +# pragma optimize("g", off) +#endif + // EDG-based compilers (Intel's classic compiler and compiler for E2K) can // define __GNUC__ but the attribute must not be used with them. // The new Clang-based ICX needs the attribute. @@ -371,6 +385,10 @@ crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc) # pragma GCC diagnostic pop #endif } +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined(_M_IX86) +# pragma optimize("", on) +#endif #endif