1
0
mirror of https://git.tukaani.org/xz.git synced 2025-12-18 19:38:43 +00:00
xz/src/liblzma/check/crc64_fast.c
Lasse Collin 85b081f5d4 liblzma: Make 32-bit x86 CRC assembly co-exist with CLMUL
Now runtime detection of CLMUL support can pick between the CLMUL and
the generic assembly implementations. Whatever overhead this has for
builds that omit CLMUL completely isn't important because builds for
any non-ancient system is likely to include the CLMUL code too.

Handle the CRC tables in crcXX_fast.c files because now these files
are built even when assembly code is used.

If 32-bit x86 assembly is enabled then it will always be built even
if compiler flags were such that CLMUL would be allowed unconditionally.
That is, runtime detection will be used anyway. This keeps the build
rules simpler.

In LZ encoder, build and use lzma_lz_hash_table[256] if CLMUL CRC
is used without runtime detection. Previously this wasn't needed
because crc32_table.c included the lzma_crc32_table[][] in the build
unless encoder support had been disabled. Including an 8 KiB table
was silly when only 1 KiB is actually used. So now liblzma is 7 KiB
smaller if CLMUL is enabled without runtime detection.
2024-06-23 14:36:44 +03:00

170 lines
3.8 KiB
C

// SPDX-License-Identifier: 0BSD
///////////////////////////////////////////////////////////////////////////////
//
/// \file crc64.c
/// \brief CRC64 calculation
//
// Authors: Lasse Collin
// Ilya Kurdyukov
//
///////////////////////////////////////////////////////////////////////////////
#include "check.h"
#include "crc_common.h"
#if defined(CRC_X86_CLMUL)
# define BUILDING_CRC_CLMUL 64
# include "crc_x86_clmul.h"
#endif
#ifdef CRC64_GENERIC
/////////////////////////////////
// Generic slice-by-four CRC64 //
/////////////////////////////////
#if defined(WORDS_BIGENDIAN)
# include "crc64_table_be.h"
#else
# include "crc64_table_le.h"
#endif
#ifdef HAVE_CRC_X86_ASM
extern uint64_t lzma_crc64_generic(
const uint8_t *buf, size_t size, uint64_t crc);
#else
#ifdef WORDS_BIGENDIAN
# define A1(x) ((x) >> 56)
#else
# define A1 A
#endif
// See the comments in crc32_fast.c. They aren't duplicated here.
static uint64_t
lzma_crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
{
crc = ~crc;
#ifdef WORDS_BIGENDIAN
crc = byteswap64(crc);
#endif
if (size > 4) {
while ((uintptr_t)(buf) & 3) {
crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc);
--size;
}
const uint8_t *const limit = buf + (size & ~(size_t)(3));
size &= (size_t)(3);
while (buf < limit) {
#ifdef WORDS_BIGENDIAN
const uint32_t tmp = (uint32_t)(crc >> 32)
^ aligned_read32ne(buf);
#else
const uint32_t tmp = (uint32_t)crc
^ aligned_read32ne(buf);
#endif
buf += 4;
crc = lzma_crc64_table[3][A(tmp)]
^ lzma_crc64_table[2][B(tmp)]
^ S32(crc)
^ lzma_crc64_table[1][C(tmp)]
^ lzma_crc64_table[0][D(tmp)];
}
}
while (size-- != 0)
crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc);
#ifdef WORDS_BIGENDIAN
crc = byteswap64(crc);
#endif
return ~crc;
}
#endif // HAVE_CRC_X86_ASM
#endif // CRC64_GENERIC
#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED)
//////////////////////////
// Function dispatching //
//////////////////////////
// If both the generic and arch-optimized implementations are usable, then
// the function that is used is selected at runtime. See crc32_fast.c.
typedef uint64_t (*crc64_func_type)(
const uint8_t *buf, size_t size, uint64_t crc);
static crc64_func_type
crc64_resolve(void)
{
return is_arch_extension_supported()
? &crc64_arch_optimized : &lzma_crc64_generic;
}
#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
# define CRC64_SET_FUNC_ATTR __attribute__((__constructor__))
static crc64_func_type crc64_func;
#else
# define CRC64_SET_FUNC_ATTR
static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc);
static crc64_func_type crc64_func = &crc64_dispatch;
#endif
CRC64_SET_FUNC_ATTR
static void
crc64_set_func(void)
{
crc64_func = crc64_resolve();
return;
}
#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
static uint64_t
crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc)
{
crc64_set_func();
return crc64_func(buf, size, crc);
}
#endif
#endif
extern LZMA_API(uint64_t)
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
{
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \
&& defined(_M_IX86) && defined(CRC64_ARCH_OPTIMIZED)
// VS2015-2022 might corrupt the ebx register on 32-bit x86 when
// the CLMUL code is enabled. This hack forces MSVC to store and
// restore ebx. This is only needed here, not in lzma_crc32().
__asm mov ebx, ebx
#endif
#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED)
return crc64_func(buf, size, crc);
#elif defined(CRC64_ARCH_OPTIMIZED)
// If arch-optimized version is used unconditionally without runtime
// CPU detection then omitting the generic version and its 8 KiB
// lookup table makes the library smaller.
return crc64_arch_optimized(buf, size, crc);
#else
return lzma_crc64_generic(buf, size, crc);
#endif
}