From 99c1c2abfae2e87f3c17e929783e6d1bb7a3f302 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Mon, 2 Feb 2009 21:19:01 +0200 Subject: [PATCH] Updated the x86 assembler code: - Use call/ret pair to get instruction pointer for PIC. - Use PIC only if PIC or __PIC__ is #defined. - The code should work on MinGW and Darwin in addition to GNU/Linux and Solaris. --- configure.ac | 6 --- src/liblzma/check/crc32_x86.S | 86 +++++++++++++++++++++++++++++------ src/liblzma/check/crc64_x86.S | 84 +++++++++++++++++++++++++++++----- 3 files changed, 146 insertions(+), 30 deletions(-) diff --git a/configure.ac b/configure.ac index 39996bcc..afd5afc1 100644 --- a/configure.ac +++ b/configure.ac @@ -307,12 +307,6 @@ if test "x$enable_assembler" = xyes; then x86_64) enable_assembler=x86_64 ;; *) enable_assembler=no ;; esac - # Darwin has different ABI than GNU+Linux and Solaris, - # and the assembler code doesn't assemble. - case $host_os in - darwin*) enable_assembler=no ;; - *) ;; - esac fi case $enable_assembler in x86) diff --git a/src/liblzma/check/crc32_x86.S b/src/liblzma/check/crc32_x86.S index 82cfb944..859311ac 100644 --- a/src/liblzma/check/crc32_x86.S +++ b/src/liblzma/check/crc32_x86.S @@ -44,12 +44,37 @@ init_table(void) * extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc); */ - .text - .globl lzma_crc32 - .type lzma_crc32, @function +/* + * On some systems, the functions need to be prefixed. The prefix is + * usually an underscore. + */ +#ifndef __USER_LABEL_PREFIX__ +# define __USER_LABEL_PREFIX__ +#endif +#define MAKE_SYM_CAT(prefix, sym) prefix ## sym +#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) +#define LZMA_CRC32 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32) +#define LZMA_CRC32_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_table) - .align 16 -lzma_crc32: +/* + * Solaris assembler doesn't have .p2align, and Darwin uses .align + * differently than GNU/Linux and Solaris. + */ +#ifdef __MACH__ +# define ALIGN(pow2, abs) .align pow2 +#else +# define ALIGN(pow2, abs) .align abs +#endif + + .text + .globl LZMA_CRC32 + +#if !defined(__MACH__) && !defined(_WIN32) + .type LZMA_CRC32, @function +#endif + + ALIGN(4, 16) +LZMA_CRC32: /* * Register usage: * %eax crc @@ -71,17 +96,32 @@ lzma_crc32: /* * Store the address of lzma_crc32_table to %ebx. This is needed to * get position-independent code (PIC). + * + * The PIC macro is defined by libtool, while __PIC__ is defined + * by GCC but only on some systems. Testing for both makes it simpler + * to test this code without libtool, and keeps the code working also + * when built with libtool but using something else than GCC. */ - call .L_PIC -.L_PIC: - popl %ebx - addl $_GLOBAL_OFFSET_TABLE_+[.-.L_PIC], %ebx - movl lzma_crc32_table@GOT(%ebx), %ebx +#if !defined(PIC) && !defined(__PIC__) + /* Not PIC */ + movl $LZMA_CRC32_TABLE, %ebx +#elif defined(__MACH__) + /* Mach-O */ + call .L_get_pc +.L_pic: + leal .L_lzma_crc32_table$non_lazy_ptr-.L_pic(%ebx), %ebx + movl (%ebx), %ebx +#else + /* ELF */ + call .L_get_pc + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl LZMA_CRC32_TABLE@GOT(%ebx), %ebx +#endif /* Complement the initial value. */ notl %eax - .align 16 + ALIGN(4, 16) .L_align: /* * Check if there is enough input to use slicing-by-eight. @@ -104,7 +144,7 @@ lzma_crc32: decl %edi jmp .L_align - .align 4 + ALIGN(2, 4) .L_slice: /* * If we get here, there's at least 16 bytes of aligned input @@ -214,7 +254,27 @@ lzma_crc32: popl %ebx ret - .size lzma_crc32, .-lzma_crc32 +#if defined(PIC) || defined(__PIC__) + ALIGN(4, 16) +.L_get_pc: + movl (%esp), %ebx + ret +#endif + +#if defined(__MACH__) && (defined(PIC) || defined(__PIC__)) + .section __IMPORT,__pointers,non_lazy_symbol_pointers +.L_lzma_crc32_table$non_lazy_ptr: + .indirect_symbol LZMA_CRC32_TABLE + .long 0 + +#elif defined(_WIN32) + /* This is equivalent of __declspec(dllexport). */ + .section .drectve + .ascii " -export:lzma_crc32" + +#else + .size LZMA_CRC32, .-LZMA_CRC32 +#endif /* * This is needed to support non-executable stack. It's ugly to diff --git a/src/liblzma/check/crc64_x86.S b/src/liblzma/check/crc64_x86.S index 78935e65..abd8f9d4 100644 --- a/src/liblzma/check/crc64_x86.S +++ b/src/liblzma/check/crc64_x86.S @@ -37,12 +37,37 @@ init_table(void) * extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); */ - .text - .globl lzma_crc64 - .type lzma_crc64, @function +/* + * On some systems, the functions need to be prefixed. The prefix is + * usually an underscore. + */ +#ifndef __USER_LABEL_PREFIX__ +# define __USER_LABEL_PREFIX__ +#endif +#define MAKE_SYM_CAT(prefix, sym) prefix ## sym +#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) +#define LZMA_CRC64 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64) +#define LZMA_CRC64_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64_table) - .align 16 -lzma_crc64: +/* + * Solaris assembler doesn't have .p2align, and Darwin uses .align + * differently than GNU/Linux and Solaris. + */ +#ifdef __MACH__ +# define ALIGN(pow2, abs) .align pow2 +#else +# define ALIGN(pow2, abs) .align abs +#endif + + .text + .globl LZMA_CRC64 + +#if !defined(__MACH__) && !defined(_WIN32) + .type LZMA_CRC64, @function +#endif + + ALIGN(4, 16) +LZMA_CRC64: /* * Register usage: * %eax crc LSB @@ -65,12 +90,27 @@ lzma_crc64: /* * Store the address of lzma_crc64_table to %ebx. This is needed to * get position-independent code (PIC). + * + * The PIC macro is defined by libtool, while __PIC__ is defined + * by GCC but only on some systems. Testing for both makes it simpler + * to test this code without libtool, and keeps the code working also + * when built with libtool but using something else than GCC. */ - call .L_PIC -.L_PIC: - popl %ebx - addl $_GLOBAL_OFFSET_TABLE_+[.-.L_PIC], %ebx - movl lzma_crc64_table@GOT(%ebx), %ebx +#if !defined(PIC) && !defined(__PIC__) + /* Not PIC */ + movl $LZMA_CRC64_TABLE, %ebx +#elif defined(__MACH__) + /* Mach-O */ + call .L_get_pc +.L_pic: + leal .L_lzma_crc64_table$non_lazy_ptr-.L_pic(%ebx), %ebx + movl (%ebx), %ebx +#else + /* ELF */ + call .L_get_pc + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl LZMA_CRC64_TABLE@GOT(%ebx), %ebx +#endif /* Complement the initial value. */ notl %eax @@ -200,7 +240,29 @@ lzma_crc64: popl %ebx ret - .size lzma_crc64, .-lzma_crc64 +#if defined(PIC) || defined(__PIC__) + ALIGN(4, 16) +.L_get_pc: + movl (%esp), %ebx + ret +#endif + +#if defined(__MACH__) && (defined(PIC) || defined(__PIC__)) + /* Mach-O PIC */ + .section __IMPORT,__pointers,non_lazy_symbol_pointers +.L_lzma_crc64_table$non_lazy_ptr: + .indirect_symbol LZMA_CRC64_TABLE + .long 0 + +#elif defined(_WIN32) + /* This is equivalent of __declspec(dllexport). */ + .section .drectve + .ascii " -export:lzma_crc64" + +#else + /* ELF */ + .size LZMA_CRC64, .-LZMA_CRC64 +#endif /* * This is needed to support non-executable stack. It's ugly to