mirror of https://git.tukaani.org/xz.git
liblzma: Remove CRC_USE_GENERIC_FOR_SMALL_INPUTS
It was already commented out.
This commit is contained in:
parent
f99a7be406
commit
71b147aab7
|
@ -164,27 +164,6 @@ extern LZMA_API(uint32_t)
|
||||||
lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
|
lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
|
||||||
{
|
{
|
||||||
#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
|
#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
|
||||||
// On x86-64, if CLMUL is available, it is the best for non-tiny
|
|
||||||
// inputs, being over twice as fast as the generic slice-by-four
|
|
||||||
// version. However, for size <= 16 it's different. In the extreme
|
|
||||||
// case of size == 1 the generic version can be five times faster.
|
|
||||||
// At size >= 8 the CLMUL starts to become reasonable. It
|
|
||||||
// varies depending on the alignment of buf too.
|
|
||||||
//
|
|
||||||
// The above doesn't include the overhead of mythread_once().
|
|
||||||
// At least on x86-64 GNU/Linux, pthread_once() is very fast but
|
|
||||||
// it still makes lzma_crc32(buf, 1, crc) 50-100 % slower. When
|
|
||||||
// size reaches 12-16 bytes the overhead becomes negligible.
|
|
||||||
//
|
|
||||||
// So using the generic version for size <= 16 may give better
|
|
||||||
// performance with tiny inputs but if such inputs happen rarely
|
|
||||||
// it's not so obvious because then the lookup table of the
|
|
||||||
// generic version may not be in the processor cache.
|
|
||||||
#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
|
||||||
if (size <= 16)
|
|
||||||
return crc32_generic(buf, size, crc);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
|
#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
|
||||||
// See crc32_dispatch(). This would be the alternative which uses
|
// See crc32_dispatch(). This would be the alternative which uses
|
||||||
|
|
|
@ -134,11 +134,6 @@ extern LZMA_API(uint64_t)
|
||||||
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
|
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
|
||||||
{
|
{
|
||||||
#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED)
|
#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED)
|
||||||
|
|
||||||
#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
|
||||||
if (size <= 16)
|
|
||||||
return crc64_generic(buf, size, crc);
|
|
||||||
#endif
|
|
||||||
return crc64_func(buf, size, crc);
|
return crc64_func(buf, size, crc);
|
||||||
|
|
||||||
#elif defined(CRC64_ARCH_OPTIMIZED)
|
#elif defined(CRC64_ARCH_OPTIMIZED)
|
||||||
|
|
|
@ -59,8 +59,6 @@
|
||||||
#undef CRC32_ARM64
|
#undef CRC32_ARM64
|
||||||
#undef CRC64_ARM64_CLMUL
|
#undef CRC64_ARM64_CLMUL
|
||||||
|
|
||||||
#undef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
|
||||||
|
|
||||||
// ARM64 CRC32 instruction is only useful for CRC32. Currently, only
|
// ARM64 CRC32 instruction is only useful for CRC32. Currently, only
|
||||||
// little endian is supported since we were unable to test on a big
|
// little endian is supported since we were unable to test on a big
|
||||||
// endian machine.
|
// endian machine.
|
||||||
|
@ -99,18 +97,6 @@
|
||||||
# define CRC32_ARCH_OPTIMIZED 1
|
# define CRC32_ARCH_OPTIMIZED 1
|
||||||
# define CRC64_ARCH_OPTIMIZED 1
|
# define CRC64_ARCH_OPTIMIZED 1
|
||||||
# define CRC_X86_CLMUL 1
|
# define CRC_X86_CLMUL 1
|
||||||
|
|
||||||
/*
|
|
||||||
// The generic code is much faster with 1-8-byte inputs and
|
|
||||||
// has similar performance up to 16 bytes at least in
|
|
||||||
// microbenchmarks (it depends on input buffer alignment
|
|
||||||
// too). If both versions are built, this #define will use
|
|
||||||
// the generic version for inputs up to 16 bytes and CLMUL
|
|
||||||
// for bigger inputs. It saves a little in code size since
|
|
||||||
// the special cases for 0-16-byte inputs will be omitted
|
|
||||||
// from the CLMUL code.
|
|
||||||
# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
|
|
||||||
*/
|
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -130,7 +130,6 @@ crc_simd_body(const uint8_t *buf, const size_t size, __m128i *v0, __m128i *v1,
|
||||||
|
|
||||||
__m128i v2, v3;
|
__m128i v2, v3;
|
||||||
|
|
||||||
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
|
||||||
if (size <= 16) {
|
if (size <= 16) {
|
||||||
// Right-shift initial_crc by 1-16 bytes based on "size"
|
// Right-shift initial_crc by 1-16 bytes based on "size"
|
||||||
// and store the result in v1 (high bytes) and v0 (low bytes).
|
// and store the result in v1 (high bytes) and v0 (low bytes).
|
||||||
|
@ -173,9 +172,7 @@ crc_simd_body(const uint8_t *buf, const size_t size, __m128i *v0, __m128i *v1,
|
||||||
|
|
||||||
*v0 = _mm_xor_si128(*v0, v3);
|
*v0 = _mm_xor_si128(*v0, v3);
|
||||||
*v1 = _mm_alignr_epi8(*v1, *v0, 8);
|
*v1 = _mm_alignr_epi8(*v1, *v0, 8);
|
||||||
} else
|
} else {
|
||||||
#endif
|
|
||||||
{
|
|
||||||
// There is more than 16 bytes of input.
|
// There is more than 16 bytes of input.
|
||||||
const __m128i data1 = _mm_load_si128(aligned_buf);
|
const __m128i data1 = _mm_load_si128(aligned_buf);
|
||||||
const __m128i *end = (const __m128i*)(
|
const __m128i *end = (const __m128i*)(
|
||||||
|
@ -245,11 +242,9 @@ crc_attr_target
|
||||||
static uint32_t
|
static uint32_t
|
||||||
crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
|
crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
|
||||||
{
|
{
|
||||||
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
|
||||||
// The code assumes that there is at least one byte of input.
|
// The code assumes that there is at least one byte of input.
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return crc;
|
return crc;
|
||||||
#endif
|
|
||||||
|
|
||||||
// uint32_t poly = 0xedb88320;
|
// uint32_t poly = 0xedb88320;
|
||||||
const int64_t p = 0x1db710640; // p << 1
|
const int64_t p = 0x1db710640; // p << 1
|
||||||
|
@ -334,11 +329,9 @@ crc_attr_target
|
||||||
static uint64_t
|
static uint64_t
|
||||||
crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc)
|
crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc)
|
||||||
{
|
{
|
||||||
#ifndef CRC_USE_GENERIC_FOR_SMALL_INPUTS
|
|
||||||
// The code assumes that there is at least one byte of input.
|
// The code assumes that there is at least one byte of input.
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return crc;
|
return crc;
|
||||||
#endif
|
|
||||||
|
|
||||||
// const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial
|
// const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial
|
||||||
const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1
|
const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1
|
||||||
|
|
Loading…
Reference in New Issue