mirror of https://git.tukaani.org/xz.git
liblzma: CRC64 CLMUL: Refactor the constants
Now it refers to crc_clmul_consts_gen.c. vfold8 was renamed to mu_p and the p no longer has the lowest bit set (it makes no difference as the output bits it affects are ignored).
This commit is contained in:
parent
9f5fc17e32
commit
ef652ac391
|
@ -280,33 +280,6 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
|
||||||
// x86 CLMUL CRC64 //
|
// x86 CLMUL CRC64 //
|
||||||
/////////////////////
|
/////////////////////
|
||||||
|
|
||||||
/*
|
|
||||||
// These functions were used to generate the constants
|
|
||||||
// at the top of crc64_arch_optimized().
|
|
||||||
static uint64_t
|
|
||||||
calc_lo(uint64_t poly)
|
|
||||||
{
|
|
||||||
uint64_t a = poly;
|
|
||||||
uint64_t b = 0;
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 64; ++i) {
|
|
||||||
b = (b >> 1) | (a << 63);
|
|
||||||
a = (a >> 1) ^ (a & 1 ? poly : 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint64_t
|
|
||||||
calc_hi(uint64_t poly, uint64_t a)
|
|
||||||
{
|
|
||||||
for (unsigned i = 0; i < 64; ++i)
|
|
||||||
a = (a >> 1) ^ (a & 1 ? poly : 0);
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef BUILDING_CRC64_CLMUL
|
#ifdef BUILDING_CRC64_CLMUL
|
||||||
|
|
||||||
// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC
|
// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC
|
||||||
|
@ -333,14 +306,12 @@ crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc)
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return crc;
|
return crc;
|
||||||
|
|
||||||
// const uint64_t poly = 0xc96c5795d7870f42; // CRC polynomial
|
// See crc_clmul_consts_gen.c.
|
||||||
const uint64_t p = 0x92d8af2baf0e1e85; // (poly << 1) | 1
|
const __m128i vfold16 = _mm_set_epi64x(
|
||||||
const uint64_t mu = 0x9c3e466c172963d5; // (calc_lo(poly) << 1) | 1
|
(int64_t)0xdabe95afc7875f40, (int64_t)0xe05dd497ca393ae4);
|
||||||
const uint64_t k2 = 0xdabe95afc7875f40; // calc_hi(poly, 1)
|
|
||||||
const uint64_t k1 = 0xe05dd497ca393ae4; // calc_hi(poly, k2)
|
|
||||||
|
|
||||||
const __m128i vfold8 = _mm_set_epi64x((int64_t)p, (int64_t)mu);
|
const __m128i mu_p = _mm_set_epi64x(
|
||||||
const __m128i vfold16 = _mm_set_epi64x((int64_t)k2, (int64_t)k1);
|
(int64_t)0x9c3e466c172963d5, (int64_t)0x92d8af2baf0e1e84);
|
||||||
|
|
||||||
__m128i v0, v1, v2;
|
__m128i v0, v1, v2;
|
||||||
|
|
||||||
|
@ -355,8 +326,8 @@ crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
v1 = _mm_xor_si128(_mm_clmulepi64_si128(v0, vfold16, 0x10), v1);
|
v1 = _mm_xor_si128(_mm_clmulepi64_si128(v0, vfold16, 0x10), v1);
|
||||||
v0 = _mm_clmulepi64_si128(v1, vfold8, 0x00);
|
v0 = _mm_clmulepi64_si128(v1, mu_p, 0x10);
|
||||||
v2 = _mm_clmulepi64_si128(v0, vfold8, 0x10);
|
v2 = _mm_clmulepi64_si128(v0, mu_p, 0x00);
|
||||||
v0 = _mm_xor_si128(_mm_xor_si128(v1, _mm_slli_si128(v0, 8)), v2);
|
v0 = _mm_xor_si128(_mm_xor_si128(v1, _mm_slli_si128(v0, 8)), v2);
|
||||||
|
|
||||||
#if defined(__i386__) || defined(_M_IX86)
|
#if defined(__i386__) || defined(_M_IX86)
|
||||||
|
|
Loading…
Reference in New Issue