liblzma: Fix building with NVHPC (NVIDIA HPC SDK).

NVHPC compiler has several issues that make it impossible to
build liblzma:
  - the compiler cannot handle unions that contain pointers that
    are not the first members;
  - the compiler cannot handle the assembler code in range_decoder.h
    (LZMA_RANGE_DECODER_CONFIG has to be set to zero);
  - the compiler fails to produce valid code for delta_decode if the
    vectorization is enabled, which results in failed tests.

This introduces NVHPC-specific workarounds that address the issues.

(cherry picked from commit 096bc0e3f8)
This commit is contained in:
Sergey Kosukhin 2024-03-13 13:07:13 +01:00 committed by Lasse Collin
parent 1888fb49f6
commit 403b4c78b8
3 changed files with 8 additions and 2 deletions

View File

@ -217,12 +217,14 @@ typedef struct {
uint16_t offset; uint16_t offset;
union { union {
// NVHPC has problems with unions that contain pointers that are not the first
// members
const name_value_map *map;
struct { struct {
uint32_t min; uint32_t min;
uint32_t max; uint32_t max;
} range; } range;
const name_value_map *map;
} u; } u;
} option_map; } option_map;

View File

@ -25,6 +25,9 @@ decode_buffer(lzma_delta_coder *coder, uint8_t *buffer, size_t size)
} }
#ifdef __NVCOMPILER
# pragma routine novector
#endif
static lzma_ret static lzma_ret
delta_decode(void *coder_ptr, const lzma_allocator *allocator, delta_decode(void *coder_ptr, const lzma_allocator *allocator,
const uint8_t *restrict in, size_t *restrict in_pos, const uint8_t *restrict in, size_t *restrict in_pos,

View File

@ -45,6 +45,7 @@
// and different processors. Overall 0x1F0 seems to be the best choice. // and different processors. Overall 0x1F0 seems to be the best choice.
#ifndef LZMA_RANGE_DECODER_CONFIG #ifndef LZMA_RANGE_DECODER_CONFIG
# if defined(__x86_64__) && !defined(__ILP32__) \ # if defined(__x86_64__) && !defined(__ILP32__) \
&& !defined(__NVCOMPILER) \
&& (defined(__GNUC__) || defined(__clang__)) && (defined(__GNUC__) || defined(__clang__))
# define LZMA_RANGE_DECODER_CONFIG 0x1F0 # define LZMA_RANGE_DECODER_CONFIG 0x1F0
# else # else