2007-12-08 22:42:33 +00:00
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
/// \file crc64.c
|
|
|
|
/// \brief CRC64 calculation
|
2009-04-13 08:27:40 +00:00
|
|
|
///
|
2022-11-14 19:34:57 +00:00
|
|
|
/// There are two methods in this file. crc64_generic uses the
|
|
|
|
/// the slice-by-four algorithm. This is the same idea that is
|
|
|
|
/// used in crc32_fast.c, but for CRC64 we use only four tables
|
2009-04-13 08:27:40 +00:00
|
|
|
/// instead of eight to avoid increasing CPU cache usage.
|
2022-11-14 19:34:57 +00:00
|
|
|
///
|
|
|
|
/// crc64_clmul uses 32/64-bit x86 SSSE3, SSE4.1, and CLMUL instructions.
|
|
|
|
/// It was derived from
|
2023-10-12 17:23:40 +00:00
|
|
|
/// https://www.researchgate.net/publication/263424619_Fast_CRC_computation
|
2022-11-14 19:34:57 +00:00
|
|
|
/// and the public domain code from https://github.com/rawrunprotected/crc
|
2023-10-12 17:23:40 +00:00
|
|
|
/// (URLs were checked on 2023-09-29).
|
2022-11-14 19:34:57 +00:00
|
|
|
///
|
|
|
|
/// FIXME: Builds for 32-bit x86 use crc64_x86.S by default instead
|
|
|
|
/// of this file and thus CLMUL version isn't available on 32-bit x86
|
|
|
|
/// unless configured with --disable-assembler. Even then the lookup table
|
|
|
|
/// isn't omitted in crc64_table.c since it doesn't know that assembly
|
|
|
|
/// code has been disabled.
|
2007-12-08 22:42:33 +00:00
|
|
|
//
|
2022-11-14 19:34:57 +00:00
|
|
|
// Authors: Lasse Collin
|
|
|
|
// Ilya Kurdyukov
|
2007-12-08 22:42:33 +00:00
|
|
|
//
|
2009-04-13 08:27:40 +00:00
|
|
|
// This file has been put into the public domain.
|
|
|
|
// You can do whatever you want with this file.
|
2007-12-08 22:42:33 +00:00
|
|
|
//
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
#include "check.h"
|
2023-10-12 17:23:40 +00:00
|
|
|
#include "crc_common.h"
|
2022-11-14 19:34:57 +00:00
|
|
|
|
2023-10-14 04:17:57 +00:00
|
|
|
#ifdef CRC_GENERIC
|
|
|
|
|
2022-11-14 19:34:57 +00:00
|
|
|
/////////////////////////////////
|
|
|
|
// Generic slice-by-four CRC64 //
|
|
|
|
/////////////////////////////////
|
|
|
|
|
2007-12-08 22:42:33 +00:00
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
# define A1(x) ((x) >> 56)
|
|
|
|
#else
|
|
|
|
# define A1 A
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2009-04-13 08:27:40 +00:00
|
|
|
// See the comments in crc32_fast.c. They aren't duplicated here.
|
2022-11-14 19:34:57 +00:00
|
|
|
static uint64_t
|
|
|
|
crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
|
2007-12-08 22:42:33 +00:00
|
|
|
{
|
|
|
|
crc = ~crc;
|
|
|
|
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
2009-10-04 19:57:12 +00:00
|
|
|
crc = bswap64(crc);
|
2007-12-08 22:42:33 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
if (size > 4) {
|
|
|
|
while ((uintptr_t)(buf) & 3) {
|
|
|
|
crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc);
|
|
|
|
--size;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint8_t *const limit = buf + (size & ~(size_t)(3));
|
|
|
|
size &= (size_t)(3);
|
|
|
|
|
|
|
|
while (buf < limit) {
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
2022-10-31 09:54:44 +00:00
|
|
|
const uint32_t tmp = (uint32_t)(crc >> 32)
|
2019-12-30 22:29:48 +00:00
|
|
|
^ aligned_read32ne(buf);
|
2007-12-08 22:42:33 +00:00
|
|
|
#else
|
2022-10-31 09:54:44 +00:00
|
|
|
const uint32_t tmp = (uint32_t)crc
|
|
|
|
^ aligned_read32ne(buf);
|
2007-12-08 22:42:33 +00:00
|
|
|
#endif
|
|
|
|
buf += 4;
|
|
|
|
|
|
|
|
crc = lzma_crc64_table[3][A(tmp)]
|
|
|
|
^ lzma_crc64_table[2][B(tmp)]
|
|
|
|
^ S32(crc)
|
|
|
|
^ lzma_crc64_table[1][C(tmp)]
|
|
|
|
^ lzma_crc64_table[0][D(tmp)];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (size-- != 0)
|
|
|
|
crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc);
|
|
|
|
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
2009-10-04 19:57:12 +00:00
|
|
|
crc = bswap64(crc);
|
2007-12-08 22:42:33 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
return ~crc;
|
|
|
|
}
|
2022-11-14 19:34:57 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(CRC_GENERIC) && defined(CRC_CLMUL)
|
2023-06-27 14:05:23 +00:00
|
|
|
typedef uint64_t (*crc64_func_type)(
|
|
|
|
const uint8_t *buf, size_t size, uint64_t crc);
|
|
|
|
|
2023-07-19 15:36:00 +00:00
|
|
|
// Clang 16.0.0 and older has a bug where it marks the ifunc resolver
|
|
|
|
// function as unused since it is static and never used outside of
|
|
|
|
// __attribute__((__ifunc__())).
|
|
|
|
#if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__)
|
|
|
|
# pragma GCC diagnostic push
|
|
|
|
# pragma GCC diagnostic ignored "-Wunused-function"
|
|
|
|
#endif
|
2023-06-27 14:05:23 +00:00
|
|
|
|
|
|
|
static crc64_func_type
|
|
|
|
crc64_resolve(void)
|
|
|
|
{
|
2023-10-14 04:17:57 +00:00
|
|
|
return lzma_is_clmul_supported() ? &lzma_crc64_clmul : &crc64_generic;
|
2023-06-27 14:05:23 +00:00
|
|
|
}
|
|
|
|
|
2023-07-19 15:36:00 +00:00
|
|
|
#if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__)
|
|
|
|
# pragma GCC diagnostic pop
|
|
|
|
#endif
|
2023-06-27 14:05:23 +00:00
|
|
|
|
|
|
|
#ifndef HAVE_FUNC_ATTRIBUTE_IFUNC
|
|
|
|
|
2022-11-14 19:34:57 +00:00
|
|
|
#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
|
|
|
|
# define CRC64_SET_FUNC_ATTR __attribute__((__constructor__))
|
2023-06-27 14:05:23 +00:00
|
|
|
static crc64_func_type crc64_func;
|
2022-11-14 19:34:57 +00:00
|
|
|
#else
|
|
|
|
# define CRC64_SET_FUNC_ATTR
|
|
|
|
static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc);
|
2023-06-27 14:05:23 +00:00
|
|
|
static crc64_func_type crc64_func = &crc64_dispatch;
|
2022-11-14 19:34:57 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
CRC64_SET_FUNC_ATTR
|
|
|
|
static void
|
|
|
|
crc64_set_func(void)
|
|
|
|
{
|
2023-06-27 14:05:23 +00:00
|
|
|
crc64_func = crc64_resolve();
|
2022-11-14 19:34:57 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
|
|
|
|
static uint64_t
|
|
|
|
crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc)
|
|
|
|
{
|
2023-06-27 14:05:23 +00:00
|
|
|
// When __attribute__((__ifunc__(...))) and
|
|
|
|
// __attribute__((__constructor__)) isn't supported, set the
|
2022-11-14 19:34:57 +00:00
|
|
|
// function pointer without any locking. If multiple threads run
|
|
|
|
// the detection code in parallel, they will all end up setting
|
|
|
|
// the pointer to the same value. This avoids the use of
|
|
|
|
// mythread_once() on every call to lzma_crc64() but this likely
|
|
|
|
// isn't strictly standards compliant. Let's change it if it breaks.
|
|
|
|
crc64_set_func();
|
|
|
|
return crc64_func(buf, size, crc);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
2023-06-27 14:05:23 +00:00
|
|
|
#endif
|
2022-11-14 19:34:57 +00:00
|
|
|
|
|
|
|
|
2023-10-14 02:23:03 +00:00
|
|
|
#ifdef CRC_USE_IFUNC
|
2023-06-27 14:05:23 +00:00
|
|
|
extern LZMA_API(uint64_t)
|
|
|
|
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
|
|
|
|
__attribute__((__ifunc__("crc64_resolve")));
|
|
|
|
#else
|
2022-11-14 19:34:57 +00:00
|
|
|
extern LZMA_API(uint64_t)
|
|
|
|
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
|
|
|
|
{
|
|
|
|
#if defined(CRC_GENERIC) && defined(CRC_CLMUL)
|
|
|
|
// If CLMUL is available, it is the best for non-tiny inputs,
|
|
|
|
// being over twice as fast as the generic slice-by-four version.
|
|
|
|
// However, for size <= 16 it's different. In the extreme case
|
|
|
|
// of size == 1 the generic version can be five times faster.
|
|
|
|
// At size >= 8 the CLMUL starts to become reasonable. It
|
|
|
|
// varies depending on the alignment of buf too.
|
|
|
|
//
|
|
|
|
// The above doesn't include the overhead of mythread_once().
|
|
|
|
// At least on x86-64 GNU/Linux, pthread_once() is very fast but
|
|
|
|
// it still makes lzma_crc64(buf, 1, crc) 50-100 % slower. When
|
|
|
|
// size reaches 12-16 bytes the overhead becomes negligible.
|
|
|
|
//
|
|
|
|
// So using the generic version for size <= 16 may give better
|
|
|
|
// performance with tiny inputs but if such inputs happen rarely
|
|
|
|
// it's not so obvious because then the lookup table of the
|
|
|
|
// generic version may not be in the processor cache.
|
|
|
|
#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
|
|
|
if (size <= 16)
|
|
|
|
return crc64_generic(buf, size, crc);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
|
|
|
|
// See crc64_dispatch(). This would be the alternative which uses
|
|
|
|
// locking and doesn't use crc64_dispatch(). Note that on Windows
|
|
|
|
// this method needs Vista threads.
|
|
|
|
mythread_once(crc64_set_func);
|
|
|
|
#endif
|
|
|
|
*/
|
|
|
|
|
|
|
|
return crc64_func(buf, size, crc);
|
|
|
|
|
|
|
|
#elif defined(CRC_CLMUL)
|
|
|
|
// If CLMUL is used unconditionally without runtime CPU detection
|
|
|
|
// then omitting the generic version and its 8 KiB lookup table
|
|
|
|
// makes the library smaller.
|
|
|
|
//
|
|
|
|
// FIXME: Lookup table isn't currently omitted on 32-bit x86,
|
|
|
|
// see crc64_table.c.
|
2023-10-14 04:17:57 +00:00
|
|
|
return lzma_crc64_clmul(buf, size, crc);
|
2022-11-14 19:34:57 +00:00
|
|
|
|
|
|
|
#else
|
|
|
|
return crc64_generic(buf, size, crc);
|
|
|
|
#endif
|
|
|
|
}
|
2023-06-27 14:05:23 +00:00
|
|
|
#endif
|