From 7971566247914ec1854b125ff99c2a617f5c1e3a Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 9 Dec 2025 12:13:36 +0200 Subject: [PATCH] Autotools: Autodetect unaligned access support on LoongArch According to [1] sections 7.4, 8.1, and 8.2, desktop and server processors support fast unaligned access, but embedded systems likely don't. It's important that TUKLIB_FAST_UNALIGNED_ACCESS isn't defined when -mstrict-align is in use because it will result in slower binaries even if running on a processor that supports fast unaligned access. It's because compilers will translate multibyte memcpy() to multiple byte-by-byte instructions instead of wider loads and stores. The compression times from [2] show this well: Unaligned access CFLAGS Compression time enabled -O2 -mno-strict-align 66.1 s disabled -O2 -mno-strict-align 79.5 s disabled -O2 -mstrict-align 79.9 s enabled -O2 -mstrict-align 129.1 s There currently (GCC 15.2) is no preprocessor macro on LoongArch to detect if -mstrict-align or -mno-strict-align is in effect (the default is -mno-strict-align). Use heuristics to detect which of the flags is in effect. [1] https://github.com/loongson/la-softdev-convention/blob/v0.2/la-softdev-convention.adoc [2] https://github.com/tukaani-project/xz/pull/186#issuecomment-3494570304 Thanks-to: Li Chenggang Thanks-to: Xi Ruoyao See: https://github.com/tukaani-project/xz/pull/186 --- m4/tuklib_integer.m4 | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/m4/tuklib_integer.m4 b/m4/tuklib_integer.m4 index 906ecf1e..a3128a20 100644 --- a/m4/tuklib_integer.m4 +++ b/m4/tuklib_integer.m4 @@ -62,6 +62,58 @@ main(void) fi ]) +# On archs that we use tuklib_integer_strict_align() (see below), we need +# objdump to detect support for unaligned access. (Libtool needs objdump +# too, so Libtool does this same tool check as well.) +AC_CHECK_TOOL([OBJDUMP], [objdump], [false]) + +# An internal helper that attempts to detect if -mstrict-align or +# -mno-strict-align is in effect. This sets enable_unaligned_access=yes +# if compilation succeeds and the regex passed as an argument does *not* +# match the objdump output of a check program. Otherwise this sets +# enable_unaligned_access=no. +tuklib_integer_strict_align () +{ + # First guess no. + enable_unaligned_access=no + + # Force -O2 because without optimizations the memcpy() + # won't be optimized out. + tuklib_integer_saved_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -O2" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #include + unsigned int check_strict_align(const void *p) + { + unsigned int i; + memcpy(&i, p, sizeof(i)); + return i; + } + ]])], [ + # Disassemble the test function from the object file. + if $OBJDUMP -d conftest.$ac_objext > conftest.s ; then + # This function should be passed a regex that + # matches if there are instructions that load + # unsigned bytes. Such instructions indicate + # that -mstrict-align is in effect. + # + # NOTE: Use braces to avoid M4 parameter + # expansion. + if grep -- "${1}" conftest.s > /dev/null ; then + : + else + # No single-byte unsigned load + # instructions were found, + # so it seems that -mno-strict-align + # is in effect. + # Override our earlier guess. + enable_unaligned_access=yes + fi + fi + ]) + CFLAGS=$tuklib_integer_saved_CFLAGS +} + AC_MSG_CHECKING([if unaligned memory access should be used]) AC_ARG_ENABLE([unaligned-access], AS_HELP_STRING([--enable-unaligned-access], [Enable if the system supports *fast* unaligned memory access @@ -107,6 +159,22 @@ compile error int main(void) { return 0; } ])], [enable_unaligned_access=yes], [enable_unaligned_access=no]) ;; + loongarch*) + # See sections 7.4, 8.1, and 8.2: + # https://github.com/loongson/la-softdev-convention/blob/v0.2/la-softdev-convention.adoc + # + # That is, desktop and server processors likely support + # unaligned access in hardware but embedded processors + # might not. GCC defaults to -mno-strict-align and so + # do majority of GNU/Linux distributions. As of + # GCC 15.2, there is no predefined macro to detect + # if -mstrict-align or -mno-strict-align is in effect. + # Use heuristics based on compiler output. + [ + tuklib_integer_strict_align \ + '[[:blank:]]ld\.bu[[:blank:]]' + ] + ;; *) enable_unaligned_access=no ;;