diff --git a/cmake/tuklib_integer.cmake b/cmake/tuklib_integer.cmake index e2d6c71d..c2cd04e4 100644 --- a/cmake/tuklib_integer.cmake +++ b/cmake/tuklib_integer.cmake @@ -159,24 +159,16 @@ function(tuklib_integer TARGET_OR_ALL) set(FAST_UNALIGNED_GUESS ON) endif() - elseif(PROCESSOR MATCHES "^arm|^aarch64|^riscv") - # On 32-bit and 64-bit ARM, GCC and Clang - # #define __ARM_FEATURE_UNALIGNED if - # unaligned access is supported. - # - # Exception: GCC at least up to 13.2.0 - # defines it even when using -mstrict-align - # so in that case this autodetection goes wrong. - # Most of the time -mstrict-align isn't used so it - # shouldn't be a common problem in practice. See: - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555 + elseif(PROCESSOR MATCHES "^arm|^riscv" AND NOT PROCESSOR MATCHES "^arm64") + # On 32-bit ARM, GCC and Clang # #define __ARM_FEATURE_UNALIGNED + # if and only if unaligned access is supported. # # RISC-V C API Specification says that if # __riscv_misaligned_fast is defined then # unaligned access is known to be fast. # # MSVC is handled as a special case: We assume that - # 32/64-bit ARM supports fast unaligned access. + # 32-bit ARM supports fast unaligned access. # If MSVC gets RISC-V support then this will assume # fast unaligned access on RISC-V too. check_c_source_compiles(" @@ -192,6 +184,53 @@ function(tuklib_integer TARGET_OR_ALL) set(FAST_UNALIGNED_GUESS ON) endif() + elseif(PROCESSOR MATCHES "^aarch64|^arm64") + # On ARM64, Clang defines __ARM_FEATURE_UNALIGNED if and only if + # unaligned access is supported. However, GCC (at least up to 15.2.0) + # defines it even when using -mstrict-align, so autodetection with + # this macro doesn't work with GCC on ARM64. (It does work on + # 32-bit ARM.) See: + # + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555 + # + # We need three checks: + # + # 1. If __ARM_FEATURE_UNALIGNED is defined and the + # compiler isn't GCC, unaligned access is enabled. + # If the compiler is MSVC, unaligned access is + # enabled even without __ARM_FEATURE_UNALIGNED. + check_c_source_compiles(" + #if defined(__ARM_FEATURE_UNALIGNED) \ + && (!defined(__GNUC__) || defined(__clang__)) + #elif defined(_MSC_VER) + #else + compile error + #endif + int main(void) { return 0; } + " + TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR) + if(TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR) + set(FAST_UNALIGNED_GUESS ON) + else() + # 2. If __ARM_FEATURE_UNALIGNED is not defined, + # unaligned access is disabled. + check_c_source_compiles(" + #ifdef __ARM_FEATURE_UNALIGNED + compile error + #endif + int main(void) { return 0; } + " + TUKLIB_FAST_UNALIGNED_NOT_DEFINED_BY_PREPROCESSOR) + if(NOT TUKLIB_FAST_UNALIGNED_NOT_DEFINED_BY_PREPROCESSOR) + # 3. Use heuristics to detect if -mstrict-align is + # in effect when building with GCC. + tuklib_integer_internal_strict_align("[ \t]ldrb[ \t]") + if(NOT TUKLIB_INTEGER_STRICT_ALIGN) + set(FAST_UNALIGNED_GUESS ON) + endif() + endif() + endif() + elseif(PROCESSOR MATCHES "^loongarch") tuklib_integer_internal_strict_align("[ \t]ld\\.bu[ \t]") if(NOT TUKLIB_INTEGER_STRICT_ALIGN) diff --git a/m4/tuklib_integer.m4 b/m4/tuklib_integer.m4 index a3128a20..29f2c95f 100644 --- a/m4/tuklib_integer.m4 +++ b/m4/tuklib_integer.m4 @@ -130,34 +130,74 @@ if test "x$enable_unaligned_access" = xauto ; then i?86|x86_64|powerpc|powerpc64|powerpc64le) enable_unaligned_access=yes ;; - arm*|aarch64*|riscv*) - # On 32-bit and 64-bit ARM, GCC and Clang - # #define __ARM_FEATURE_UNALIGNED if - # unaligned access is supported. - # - # Exception: GCC at least up to 13.2.0 - # defines it even when using -mstrict-align - # so in that case this autodetection goes wrong. - # Most of the time -mstrict-align isn't used so it - # shouldn't be a common problem in practice. See: - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555 + arm*|riscv*) + # On 32-bit ARM, GCC and Clang + # #define __ARM_FEATURE_UNALIGNED + # if and only if unaligned access is supported. # # RISC-V C API Specification says that if # __riscv_misaligned_fast is defined then # unaligned access is known to be fast. # # MSVC is handled as a special case: We assume that - # 32/64-bit ARM supports fast unaligned access. + # 32-bit ARM supports fast unaligned access. # If MSVC gets RISC-V support then this will assume # fast unaligned access on RISC-V too. AC_COMPILE_IFELSE([AC_LANG_SOURCE([ -#if !defined(__ARM_FEATURE_UNALIGNED) \ - && !defined(__riscv_misaligned_fast) \ - && !defined(_MSC_VER) -compile error -#endif -int main(void) { return 0; } -])], [enable_unaligned_access=yes], [enable_unaligned_access=no]) + #if !defined(__ARM_FEATURE_UNALIGNED) \ + && !defined(__riscv_misaligned_fast) \ + && !defined(_MSC_VER) + compile error + #endif + int main(void) { return 0; } + ])], + [enable_unaligned_access=yes], + [enable_unaligned_access=no]) + ;; + aarch64*) + # On ARM64, Clang defines __ARM_FEATURE_UNALIGNED + # if and only if unaligned access is supported. + # However, GCC (at least up to 15.2.0) defines it + # even when using -mstrict-align, so autodetection + # with this macro doesn't work with GCC on ARM64. + # (It does work on 32-bit ARM.) See: + # + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555 + # + # We need three checks: + # + # 1. If __ARM_FEATURE_UNALIGNED is defined and the + # compiler isn't GCC, unaligned access is enabled. + # If the compiler is MSVC, unaligned access is + # enabled even without __ARM_FEATURE_UNALIGNED. + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #if defined(__ARM_FEATURE_UNALIGNED) \ + && (!defined(__GNUC__) \ + || defined(__clang__)) + #elif defined(_MSC_VER) + #else + compile error + #endif + int main(void) { return 0; } + ])], [enable_unaligned_access=yes]) + + # 2. If __ARM_FEATURE_UNALIGNED is not defined, + # unaligned access is disabled. + if test "x$enable_unaligned_access" = xauto ; then + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #ifdef __ARM_FEATURE_UNALIGNED + compile error + #endif + int main(void) { return 0; } + ])], [enable_unaligned_access=no]) + fi + + # 3. Use heuristics to detect if -mstrict-align is + # in effect when building with GCC. + if test "x$enable_unaligned_access" = xauto ; then + [tuklib_integer_strict_align \ + '[[:blank:]]ldrb[[:blank:]]'] + fi ;; loongarch*) # See sections 7.4, 8.1, and 8.2: