mirror of https://git.tukaani.org/xz.git
liblzma: Speed up CRC32 calculation on 64-bit LoongArch
The crc.w.{b/h/w/d}.w instructions in LoongArch can calculate the CRC32 result for 1/2/4/8 bytes in a single operation. Using these is much faster compared to the generic method. Optimized CRC32 is enabled unconditionally on 64-bit LoongArch because the LoongArch specification says that CRC32 instructions shall be implemented for 64-bit processors. Optimized CRC32 isn't enabled for 32-bit LoongArch processors because not enough information is available about them. Co-authored-by: Lasse Collin <lasse.collin@tukaani.org> Closes: https://github.com/tukaani-project/xz/pull/86
This commit is contained in:
parent
0ed8936685
commit
7baf6835cf
|
@ -548,6 +548,7 @@ add_library(liblzma
|
||||||
src/liblzma/check/crc_common.h
|
src/liblzma/check/crc_common.h
|
||||||
src/liblzma/check/crc_x86_clmul.h
|
src/liblzma/check/crc_x86_clmul.h
|
||||||
src/liblzma/check/crc32_arm64.h
|
src/liblzma/check/crc32_arm64.h
|
||||||
|
src/liblzma/check/crc32_loongarch.h
|
||||||
src/liblzma/common/block_util.c
|
src/liblzma/common/block_util.c
|
||||||
src/liblzma/common/common.c
|
src/liblzma/common/common.c
|
||||||
src/liblzma/common/common.h
|
src/liblzma/common/common.h
|
||||||
|
@ -1341,6 +1342,30 @@ if(XZ_ARM64_CRC32)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
option(XZ_LOONGARCH_CRC32
|
||||||
|
"Use LoongArch CRC32 instructions if supported by the compiler" ON)
|
||||||
|
|
||||||
|
if(XZ_LOONGARCH_CRC32)
|
||||||
|
# LoongArch CRC32 intrinsics are in larchintrin.h.
|
||||||
|
# These are supported by at least GCC and Clang.
|
||||||
|
#
|
||||||
|
# Only 64-bit LoongArch is currently supported.
|
||||||
|
# It doesn't need runtime detection.
|
||||||
|
check_c_source_compiles("
|
||||||
|
#if !(defined(__loongarch__) && __loongarch_grlen >= 64)
|
||||||
|
# error
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <larchintrin.h>
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
return __crc_w_w_w(1, 2);
|
||||||
|
}
|
||||||
|
"
|
||||||
|
HAVE_LOONGARCH_CRC32)
|
||||||
|
tuklib_add_definition_if(liblzma HAVE_LOONGARCH_CRC32)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# Symbol visibility support:
|
# Symbol visibility support:
|
||||||
#
|
#
|
||||||
|
|
40
configure.ac
40
configure.ac
|
@ -394,6 +394,16 @@ AC_ARG_ENABLE([arm64-crc32], AS_HELP_STRING([--disable-arm64-crc32],
|
||||||
[], [enable_arm64_crc32=yes])
|
[], [enable_arm64_crc32=yes])
|
||||||
|
|
||||||
|
|
||||||
|
################################
|
||||||
|
# LoongArch CRC32 instructions #
|
||||||
|
################################
|
||||||
|
|
||||||
|
AC_ARG_ENABLE([loongarch-crc32], AS_HELP_STRING([--disable-loongarch-crc32],
|
||||||
|
[Do not use LoongArch CRC32 instructions even if support for
|
||||||
|
them is detected.]),
|
||||||
|
[], [enable_loongarch_crc32=yes])
|
||||||
|
|
||||||
|
|
||||||
#####################
|
#####################
|
||||||
# Size optimization #
|
# Size optimization #
|
||||||
#####################
|
#####################
|
||||||
|
@ -1106,6 +1116,36 @@ AS_IF([test "x$enable_arm64_crc32" = xyes], [
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
|
# LoongArch CRC32 intrinsics are in larchintrin.h.
|
||||||
|
# These are supported by at least GCC and Clang.
|
||||||
|
#
|
||||||
|
# Only 64-bit LoongArch is currently supported.
|
||||||
|
# It doesn't need runtime detection.
|
||||||
|
AC_MSG_CHECKING([if LoongArch CRC32 instructions are usable])
|
||||||
|
AS_IF([test "x$enable_loongarch_crc32" = xno], [
|
||||||
|
AC_MSG_RESULT([no, --disable-loongarch-crc32 was used])
|
||||||
|
], [
|
||||||
|
AC_LINK_IFELSE([AC_LANG_SOURCE([[
|
||||||
|
#if !(defined(__loongarch__) && __loongarch_grlen >= 64)
|
||||||
|
# error
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <larchintrin.h>
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
return __crc_w_w_w(1, 2);
|
||||||
|
}
|
||||||
|
]])], [
|
||||||
|
AC_DEFINE([HAVE_LOONGARCH_CRC32], [1], [Define to 1 if
|
||||||
|
64-bit LoongArch CRC32 instructions are supported.])
|
||||||
|
enable_loongarch_crc32=yes
|
||||||
|
], [
|
||||||
|
enable_loongarch_crc32=no
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT([$enable_loongarch_crc32])
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
# Check for sandbox support. If one is found, set enable_sandbox=found.
|
# Check for sandbox support. If one is found, set enable_sandbox=found.
|
||||||
#
|
#
|
||||||
# About -fsanitize: Of our three sandbox methods, only Landlock is
|
# About -fsanitize: Of our three sandbox methods, only Landlock is
|
||||||
|
|
|
@ -14,7 +14,8 @@ liblzma_la_SOURCES += \
|
||||||
check/check.h \
|
check/check.h \
|
||||||
check/crc_common.h \
|
check/crc_common.h \
|
||||||
check/crc_x86_clmul.h \
|
check/crc_x86_clmul.h \
|
||||||
check/crc32_arm64.h
|
check/crc32_arm64.h \
|
||||||
|
check/crc32_loongarch.h
|
||||||
|
|
||||||
if COND_SMALL
|
if COND_SMALL
|
||||||
liblzma_la_SOURCES += check/crc32_small.c
|
liblzma_la_SOURCES += check/crc32_small.c
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
# include "crc_x86_clmul.h"
|
# include "crc_x86_clmul.h"
|
||||||
#elif defined(CRC32_ARM64)
|
#elif defined(CRC32_ARM64)
|
||||||
# include "crc32_arm64.h"
|
# include "crc32_arm64.h"
|
||||||
|
#elif defined(CRC32_LOONGARCH)
|
||||||
|
# include "crc32_loongarch.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
// SPDX-License-Identifier: 0BSD
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
/// \file crc32_loongarch.h
|
||||||
|
/// \brief CRC32 calculation with LoongArch optimization
|
||||||
|
//
|
||||||
|
// Authors: Xi Ruoyao
|
||||||
|
// Lasse Collin
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef LZMA_CRC32_LOONGARCH_H
|
||||||
|
#define LZMA_CRC32_LOONGARCH_H
|
||||||
|
|
||||||
|
#include <larchintrin.h>
|
||||||
|
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc_unsigned)
|
||||||
|
{
|
||||||
|
int32_t crc = (int32_t)~crc_unsigned;
|
||||||
|
|
||||||
|
if (size >= 8) {
|
||||||
|
const size_t align = (0 - (uintptr_t)buf) & 7;
|
||||||
|
|
||||||
|
if (align & 1)
|
||||||
|
crc = __crc_w_b_w((int8_t)*buf++, crc);
|
||||||
|
|
||||||
|
if (align & 2) {
|
||||||
|
crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc);
|
||||||
|
buf += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (align & 4) {
|
||||||
|
crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc);
|
||||||
|
buf += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
size -= align;
|
||||||
|
|
||||||
|
for (const uint8_t *limit = buf + (size & ~(size_t)7);
|
||||||
|
buf < limit; buf += 8)
|
||||||
|
crc = __crc_w_d_w((int64_t)aligned_read64le(buf), crc);
|
||||||
|
|
||||||
|
size &= 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size & 4) {
|
||||||
|
crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc);
|
||||||
|
buf += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size & 2) {
|
||||||
|
crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc);
|
||||||
|
buf += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size & 1)
|
||||||
|
crc = __crc_w_b_w((int8_t)*buf, crc);
|
||||||
|
|
||||||
|
return (uint32_t)~crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // LZMA_CRC32_LOONGARCH_H
|
|
@ -83,6 +83,9 @@ extern const uint64_t lzma_crc64_table[4][256];
|
||||||
// CRC64 could be done with CLMUL but it's not implemented yet.
|
// CRC64 could be done with CLMUL but it's not implemented yet.
|
||||||
#undef CRC32_ARM64
|
#undef CRC32_ARM64
|
||||||
|
|
||||||
|
// 64-bit LoongArch has CRC32 instructions.
|
||||||
|
#undef CRC32_LOONGARCH
|
||||||
|
|
||||||
|
|
||||||
// ARM64
|
// ARM64
|
||||||
//
|
//
|
||||||
|
@ -112,6 +115,18 @@ extern const uint64_t lzma_crc64_table[4][256];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// LoongArch
|
||||||
|
//
|
||||||
|
// Only 64-bit LoongArch is supported for now. No runtime detection
|
||||||
|
// is needed because the LoongArch specification says that the CRC32
|
||||||
|
// instructions are a part of the Basic Integer Instructions and
|
||||||
|
// they shall be implemented by 64-bit LoongArch implementations.
|
||||||
|
#ifdef HAVE_LOONGARCH_CRC32
|
||||||
|
# define CRC32_ARCH_OPTIMIZED 1
|
||||||
|
# define CRC32_LOONGARCH 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// x86 and E2K
|
// x86 and E2K
|
||||||
#if defined(HAVE_USABLE_CLMUL)
|
#if defined(HAVE_USABLE_CLMUL)
|
||||||
// If CLMUL is allowed unconditionally in the compiler options then
|
// If CLMUL is allowed unconditionally in the compiler options then
|
||||||
|
|
Loading…
Reference in New Issue