mirror of https://git.tukaani.org/xz.git
liblzma: CRC32 CLMUL: Refactor the constants and simplify
By using modulus scaled constants, the final reduction can be simplified.
This commit is contained in:
parent
ef652ac391
commit
d8fb098617
|
@ -211,31 +211,6 @@ crc_simd_body(const uint8_t *buf, const size_t size, __m128i *v0, __m128i *v1,
|
||||||
// x86 CLMUL CRC32 //
|
// x86 CLMUL CRC32 //
|
||||||
/////////////////////
|
/////////////////////
|
||||||
|
|
||||||
/*
|
|
||||||
// These functions were used to generate the constants
|
|
||||||
// at the top of crc32_arch_optimized().
|
|
||||||
static uint64_t
|
|
||||||
calc_lo(uint64_t p, uint64_t a, int n)
|
|
||||||
{
|
|
||||||
uint64_t b = 0; int i;
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
b = b >> 1 | (a & 1) << (n - 1);
|
|
||||||
a = (a >> 1) ^ ((0 - (a & 1)) & p);
|
|
||||||
}
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
// same as ~crc(&a, sizeof(a), ~0)
|
|
||||||
static uint64_t
|
|
||||||
calc_hi(uint64_t p, uint64_t a, int n)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < n; i++)
|
|
||||||
a = (a >> 1) ^ ((0 - (a & 1)) & p);
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef BUILDING_CRC32_CLMUL
|
#ifdef BUILDING_CRC32_CLMUL
|
||||||
|
|
||||||
crc_attr_target
|
crc_attr_target
|
||||||
|
@ -246,31 +221,22 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return crc;
|
return crc;
|
||||||
|
|
||||||
// uint32_t poly = 0xedb88320;
|
// See crc_clmul_consts_gen.c.
|
||||||
const int64_t p = 0x1db710640; // p << 1
|
const __m128i vfold16 = _mm_set_epi64x(0xccaa009e, 0xae689191);
|
||||||
const int64_t mu = 0x1f7011641; // calc_lo(p, p, 32) << 1 | 1
|
const __m128i mu_p = _mm_set_epi64x(
|
||||||
const int64_t k5 = 0x163cd6124; // calc_hi(p, p, 32) << 1
|
(int64_t)0xb4e5b025f7011641, 0x1db710640);
|
||||||
const int64_t k4 = 0x0ccaa009e; // calc_hi(p, p, 64) << 1
|
|
||||||
const int64_t k3 = 0x1751997d0; // calc_hi(p, p, 128) << 1
|
|
||||||
|
|
||||||
const __m128i vfold4 = _mm_set_epi64x(mu, p);
|
__m128i v0, v1;
|
||||||
const __m128i vfold8 = _mm_set_epi64x(0, k5);
|
|
||||||
const __m128i vfold16 = _mm_set_epi64x(k4, k3);
|
|
||||||
|
|
||||||
__m128i v0, v1, v2;
|
|
||||||
|
|
||||||
crc_simd_body(buf, size, &v0, &v1, vfold16,
|
crc_simd_body(buf, size, &v0, &v1, vfold16,
|
||||||
_mm_cvtsi32_si128((int32_t)~crc));
|
_mm_cvtsi32_si128((int32_t)~crc));
|
||||||
|
|
||||||
v1 = _mm_xor_si128(
|
v1 = _mm_xor_si128(
|
||||||
_mm_clmulepi64_si128(v0, vfold16, 0x10), v1); // xxx0
|
_mm_clmulepi64_si128(v0, vfold16, 0x10), v1); // xxx0
|
||||||
v2 = _mm_shuffle_epi32(v1, 0xe7); // 0xx0
|
|
||||||
v0 = _mm_slli_epi64(v1, 32); // [0]
|
v0 = _mm_clmulepi64_si128(v1, mu_p, 0x10); // v1 * mu
|
||||||
v0 = _mm_clmulepi64_si128(v0, vfold8, 0x00);
|
v0 = _mm_clmulepi64_si128(v0, mu_p, 0x00); // v0 * p
|
||||||
v0 = _mm_xor_si128(v0, v2); // [1] [2]
|
v0 = _mm_xor_si128(v0, v1);
|
||||||
v2 = _mm_clmulepi64_si128(v0, vfold4, 0x10);
|
|
||||||
v2 = _mm_clmulepi64_si128(v2, vfold4, 0x00);
|
|
||||||
v0 = _mm_xor_si128(v0, v2); // [2]
|
|
||||||
return ~(uint32_t)_mm_extract_epi32(v0, 2);
|
return ~(uint32_t)_mm_extract_epi32(v0, 2);
|
||||||
}
|
}
|
||||||
#endif // BUILDING_CRC32_CLMUL
|
#endif // BUILDING_CRC32_CLMUL
|
||||||
|
|
Loading…
Reference in New Issue