kopie van
				https://git.tukaani.org/xz.git
				synced 2025-10-31 13:32:56 +00:00 
			
		
		
		
	liblzma: Speed up CRC32 calculation on 64-bit LoongArch
The crc.w.{b/h/w/d}.w instructions in LoongArch can calculate the CRC32
result for 1/2/4/8 bytes in a single operation. Using these is much
faster compared to the generic method.
Optimized CRC32 is enabled unconditionally on 64-bit LoongArch because
the LoongArch specification says that CRC32 instructions shall be
implemented for 64-bit processors. Optimized CRC32 isn't enabled for
32-bit LoongArch processors because not enough information is available
about them.
Co-authored-by: Lasse Collin <lasse.collin@tukaani.org>
Closes: https://github.com/tukaani-project/xz/pull/86
			
			
This commit is contained in:
		
							bovenliggende
							
								
									0ed8936685
								
							
						
					
					
						commit
						7baf6835cf
					
				| @ -548,6 +548,7 @@ add_library(liblzma | |||||||
|     src/liblzma/check/crc_common.h |     src/liblzma/check/crc_common.h | ||||||
|     src/liblzma/check/crc_x86_clmul.h |     src/liblzma/check/crc_x86_clmul.h | ||||||
|     src/liblzma/check/crc32_arm64.h |     src/liblzma/check/crc32_arm64.h | ||||||
|  |     src/liblzma/check/crc32_loongarch.h | ||||||
|     src/liblzma/common/block_util.c |     src/liblzma/common/block_util.c | ||||||
|     src/liblzma/common/common.c |     src/liblzma/common/common.c | ||||||
|     src/liblzma/common/common.h |     src/liblzma/common/common.h | ||||||
| @ -1341,6 +1342,30 @@ if(XZ_ARM64_CRC32) | |||||||
|     endif() |     endif() | ||||||
| endif() | endif() | ||||||
| 
 | 
 | ||||||
|  | option(XZ_LOONGARCH_CRC32 | ||||||
|  |        "Use LoongArch CRC32 instructions if supported by the compiler" ON) | ||||||
|  | 
 | ||||||
|  | if(XZ_LOONGARCH_CRC32) | ||||||
|  |     # LoongArch CRC32 intrinsics are in larchintrin.h. | ||||||
|  |     # These are supported by at least GCC and Clang. | ||||||
|  |     # | ||||||
|  |     # Only 64-bit LoongArch is currently supported. | ||||||
|  |     # It doesn't need runtime detection. | ||||||
|  |     check_c_source_compiles(" | ||||||
|  |             #if !(defined(__loongarch__) && __loongarch_grlen >= 64) | ||||||
|  |             #   error | ||||||
|  |             #endif | ||||||
|  | 
 | ||||||
|  |             #include <larchintrin.h> | ||||||
|  |             int main(void) | ||||||
|  |             { | ||||||
|  |                 return __crc_w_w_w(1, 2); | ||||||
|  |             } | ||||||
|  |         " | ||||||
|  |         HAVE_LOONGARCH_CRC32) | ||||||
|  |     tuklib_add_definition_if(liblzma HAVE_LOONGARCH_CRC32) | ||||||
|  | endif() | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # Symbol visibility support: | # Symbol visibility support: | ||||||
| # | # | ||||||
|  | |||||||
							
								
								
									
										40
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										40
									
								
								configure.ac
									
									
									
									
									
								
							| @ -394,6 +394,16 @@ AC_ARG_ENABLE([arm64-crc32], AS_HELP_STRING([--disable-arm64-crc32], | |||||||
| 	[], [enable_arm64_crc32=yes]) | 	[], [enable_arm64_crc32=yes]) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | ################################ | ||||||
|  | # LoongArch CRC32 instructions # | ||||||
|  | ################################ | ||||||
|  | 
 | ||||||
|  | AC_ARG_ENABLE([loongarch-crc32], AS_HELP_STRING([--disable-loongarch-crc32], | ||||||
|  | 		[Do not use LoongArch CRC32 instructions even if support for | ||||||
|  | 		them is detected.]), | ||||||
|  | 	[], [enable_loongarch_crc32=yes]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| ##################### | ##################### | ||||||
| # Size optimization # | # Size optimization # | ||||||
| ##################### | ##################### | ||||||
| @ -1106,6 +1116,36 @@ AS_IF([test "x$enable_arm64_crc32" = xyes], [ | |||||||
| ]) | ]) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # LoongArch CRC32 intrinsics are in larchintrin.h. | ||||||
|  | # These are supported by at least GCC and Clang. | ||||||
|  | # | ||||||
|  | # Only 64-bit LoongArch is currently supported. | ||||||
|  | # It doesn't need runtime detection. | ||||||
|  | AC_MSG_CHECKING([if LoongArch CRC32 instructions are usable]) | ||||||
|  | AS_IF([test "x$enable_loongarch_crc32" = xno], [ | ||||||
|  | 	AC_MSG_RESULT([no, --disable-loongarch-crc32 was used]) | ||||||
|  | ], [ | ||||||
|  | 	AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||||||
|  | #if !(defined(__loongarch__) && __loongarch_grlen >= 64) | ||||||
|  | #	error | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #include <larchintrin.h> | ||||||
|  | int main(void) | ||||||
|  | { | ||||||
|  | 	return __crc_w_w_w(1, 2); | ||||||
|  | } | ||||||
|  | 	]])], [ | ||||||
|  | 		AC_DEFINE([HAVE_LOONGARCH_CRC32], [1], [Define to 1 if | ||||||
|  | 			64-bit LoongArch CRC32 instructions are supported.]) | ||||||
|  | 		enable_loongarch_crc32=yes | ||||||
|  | 	], [ | ||||||
|  | 		enable_loongarch_crc32=no | ||||||
|  | 	]) | ||||||
|  | 	AC_MSG_RESULT([$enable_loongarch_crc32]) | ||||||
|  | ]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| # Check for sandbox support. If one is found, set enable_sandbox=found. | # Check for sandbox support. If one is found, set enable_sandbox=found. | ||||||
| # | # | ||||||
| # About -fsanitize: Of our three sandbox methods, only Landlock is | # About -fsanitize: Of our three sandbox methods, only Landlock is | ||||||
|  | |||||||
| @ -14,7 +14,8 @@ liblzma_la_SOURCES += \ | |||||||
| 	check/check.h \ | 	check/check.h \ | ||||||
| 	check/crc_common.h \ | 	check/crc_common.h \ | ||||||
| 	check/crc_x86_clmul.h \ | 	check/crc_x86_clmul.h \ | ||||||
| 	check/crc32_arm64.h | 	check/crc32_arm64.h \ | ||||||
|  | 	check/crc32_loongarch.h | ||||||
| 
 | 
 | ||||||
| if COND_SMALL | if COND_SMALL | ||||||
| liblzma_la_SOURCES += check/crc32_small.c | liblzma_la_SOURCES += check/crc32_small.c | ||||||
|  | |||||||
| @ -19,6 +19,8 @@ | |||||||
| #	include "crc_x86_clmul.h" | #	include "crc_x86_clmul.h" | ||||||
| #elif defined(CRC32_ARM64) | #elif defined(CRC32_ARM64) | ||||||
| #	include "crc32_arm64.h" | #	include "crc32_arm64.h" | ||||||
|  | #elif defined(CRC32_LOONGARCH) | ||||||
|  | #	include "crc32_loongarch.h" | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										65
									
								
								src/liblzma/check/crc32_loongarch.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								src/liblzma/check/crc32_loongarch.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | |||||||
|  | // SPDX-License-Identifier: 0BSD
 | ||||||
|  | 
 | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||||||
|  | //
 | ||||||
|  | /// \file       crc32_loongarch.h
 | ||||||
|  | /// \brief      CRC32 calculation with LoongArch optimization
 | ||||||
|  | //
 | ||||||
|  | //  Authors:    Xi Ruoyao
 | ||||||
|  | //              Lasse Collin
 | ||||||
|  | //
 | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||||||
|  | 
 | ||||||
|  | #ifndef LZMA_CRC32_LOONGARCH_H | ||||||
|  | #define LZMA_CRC32_LOONGARCH_H | ||||||
|  | 
 | ||||||
|  | #include <larchintrin.h> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static uint32_t | ||||||
|  | crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc_unsigned) | ||||||
|  | { | ||||||
|  | 	int32_t crc = (int32_t)~crc_unsigned; | ||||||
|  | 
 | ||||||
|  | 	if (size >= 8) { | ||||||
|  | 		const size_t align = (0 - (uintptr_t)buf) & 7; | ||||||
|  | 
 | ||||||
|  | 		if (align & 1) | ||||||
|  | 			crc = __crc_w_b_w((int8_t)*buf++, crc); | ||||||
|  | 
 | ||||||
|  | 		if (align & 2) { | ||||||
|  | 			crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc); | ||||||
|  | 			buf += 2; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if (align & 4) { | ||||||
|  | 			crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc); | ||||||
|  | 			buf += 4; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		size -= align; | ||||||
|  | 
 | ||||||
|  | 		for (const uint8_t *limit = buf + (size & ~(size_t)7); | ||||||
|  | 				buf < limit; buf += 8) | ||||||
|  | 			crc = __crc_w_d_w((int64_t)aligned_read64le(buf), crc); | ||||||
|  | 
 | ||||||
|  | 		size &= 7; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (size & 4) { | ||||||
|  | 		crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc); | ||||||
|  | 		buf += 4; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (size & 2) { | ||||||
|  | 		crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc); | ||||||
|  | 		buf += 2; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (size & 1) | ||||||
|  | 		crc = __crc_w_b_w((int8_t)*buf, crc); | ||||||
|  | 
 | ||||||
|  | 	return (uint32_t)~crc; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif // LZMA_CRC32_LOONGARCH_H
 | ||||||
| @ -83,6 +83,9 @@ extern const uint64_t lzma_crc64_table[4][256]; | |||||||
| // CRC64 could be done with CLMUL but it's not implemented yet.
 | // CRC64 could be done with CLMUL but it's not implemented yet.
 | ||||||
| #undef CRC32_ARM64 | #undef CRC32_ARM64 | ||||||
| 
 | 
 | ||||||
|  | // 64-bit LoongArch has CRC32 instructions.
 | ||||||
|  | #undef CRC32_LOONGARCH | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| // ARM64
 | // ARM64
 | ||||||
| //
 | //
 | ||||||
| @ -112,6 +115,18 @@ extern const uint64_t lzma_crc64_table[4][256]; | |||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | // LoongArch
 | ||||||
|  | //
 | ||||||
|  | // Only 64-bit LoongArch is supported for now. No runtime detection
 | ||||||
|  | // is needed because the LoongArch specification says that the CRC32
 | ||||||
|  | // instructions are a part of the Basic Integer Instructions and
 | ||||||
|  | // they shall be implemented by 64-bit LoongArch implementations.
 | ||||||
|  | #ifdef HAVE_LOONGARCH_CRC32 | ||||||
|  | #	define CRC32_ARCH_OPTIMIZED 1 | ||||||
|  | #	define CRC32_LOONGARCH 1 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| // x86 and E2K
 | // x86 and E2K
 | ||||||
| #if defined(HAVE_USABLE_CLMUL) | #if defined(HAVE_USABLE_CLMUL) | ||||||
| 	// If CLMUL is allowed unconditionally in the compiler options then
 | 	// If CLMUL is allowed unconditionally in the compiler options then
 | ||||||
|  | |||||||
		Laden…
	
	
			
			x
			
			
		
	
		Verwijs in nieuw issue
	
	Block a user