mirror of https://git.tukaani.org/xz.git
liblzma: ARM64 CRC32: Align the buffer faster
Instead of doing it byte by byte, use the 1/2/4-byte CRC32 instructions.
This commit is contained in:
parent
7e99856f66
commit
0ed8936685
|
@ -8,6 +8,7 @@
|
||||||
// Authors: Chenxi Mao
|
// Authors: Chenxi Mao
|
||||||
// Jia Tan
|
// Jia Tan
|
||||||
// Hans Jansen
|
// Hans Jansen
|
||||||
|
// Lasse Collin
|
||||||
//
|
//
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
@ -54,25 +55,50 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
|
||||||
{
|
{
|
||||||
crc = ~crc;
|
crc = ~crc;
|
||||||
|
|
||||||
|
if (size >= 8) {
|
||||||
// Align the input buffer because this was shown to be
|
// Align the input buffer because this was shown to be
|
||||||
// significantly faster than unaligned accesses.
|
// significantly faster than unaligned accesses.
|
||||||
const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7);
|
const size_t align = (0 - (uintptr_t)buf) & 7;
|
||||||
|
|
||||||
for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)
|
if (align & 1)
|
||||||
crc = __crc32b(crc, *buf);
|
crc = __crc32b(crc, *buf++);
|
||||||
|
|
||||||
size -= align_amount;
|
if (align & 2) {
|
||||||
|
crc = __crc32h(crc, aligned_read16le(buf));
|
||||||
|
buf += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (align & 4) {
|
||||||
|
crc = __crc32w(crc, aligned_read32le(buf));
|
||||||
|
buf += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
size -= align;
|
||||||
|
|
||||||
// Process 8 bytes at a time. The end point is determined by
|
// Process 8 bytes at a time. The end point is determined by
|
||||||
// ignoring the least significant three bits of size to ensure
|
// ignoring the least significant three bits of size to
|
||||||
// we do not process past the bounds of the buffer. This guarantees
|
// ensure we do not process past the bounds of the buffer.
|
||||||
// that limit is a multiple of 8 and is strictly less than size.
|
// This guarantees that limit is a multiple of 8 and is
|
||||||
|
// strictly less than size.
|
||||||
for (const uint8_t *limit = buf + (size & ~(size_t)7);
|
for (const uint8_t *limit = buf + (size & ~(size_t)7);
|
||||||
buf < limit; buf += 8)
|
buf < limit; buf += 8)
|
||||||
crc = __crc32d(crc, aligned_read64le(buf));
|
crc = __crc32d(crc, aligned_read64le(buf));
|
||||||
|
|
||||||
|
size &= 7;
|
||||||
|
}
|
||||||
|
|
||||||
// Process the remaining bytes that are not 8 byte aligned.
|
// Process the remaining bytes that are not 8 byte aligned.
|
||||||
for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf)
|
if (size & 4) {
|
||||||
|
crc = __crc32w(crc, aligned_read32le(buf));
|
||||||
|
buf += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size & 2) {
|
||||||
|
crc = __crc32h(crc, aligned_read16le(buf));
|
||||||
|
buf += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size & 1)
|
||||||
crc = __crc32b(crc, *buf);
|
crc = __crc32b(crc, *buf);
|
||||||
|
|
||||||
return ~crc;
|
return ~crc;
|
||||||
|
|
Loading…
Reference in New Issue