From c5f68b5cc79085a87f950fea53843e27f328068e Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Fri, 2 Oct 2009 11:03:26 +0300 Subject: [PATCH] Make liblzma produce the same output on both endiannesses. Seems that it is a problem in some cases if the same version of XZ Utils produces different output on different endiannesses, so this commit fixes that problem. The output will still vary between different XZ Utils versions, but I cannot avoid that for now. This commit bloatens the code on big endian systems by 1 KiB, which should be OK since liblzma is bloated already. ;-) --- src/liblzma/check/crc32_tablegen.c | 30 ++++++++++++ src/liblzma/lz/Makefile.inc | 1 + src/liblzma/lz/lz_encoder.c | 7 ++- src/liblzma/lz/lz_encoder_hash.h | 35 ++++++++----- src/liblzma/lz/lz_encoder_hash_table.h | 68 ++++++++++++++++++++++++++ src/liblzma/lz/lz_encoder_mf.c | 1 - 6 files changed, 128 insertions(+), 14 deletions(-) create mode 100644 src/liblzma/lz/lz_encoder_hash_table.h diff --git a/src/liblzma/check/crc32_tablegen.c b/src/liblzma/check/crc32_tablegen.c index 0cbfecd8..56bc5c7f 100644 --- a/src/liblzma/check/crc32_tablegen.c +++ b/src/liblzma/check/crc32_tablegen.c @@ -5,6 +5,7 @@ /// /// Compiling: gcc -std=c99 -o crc32_tablegen crc32_tablegen.c /// Add -DWORDS_BIGENDIAN to generate big endian table. +/// Add -DLZ_HASH_TABLE to generate lz_encoder_hash_table.h (little endian). // // Author: Lasse Collin // @@ -82,10 +83,39 @@ print_crc32_table(void) } +static void +print_lz_table(void) +{ + printf("/* This file has been automatically generated by " + "crc32_tablegen.c. */\n\n" + "const uint32_t lzma_lz_hash_table[256] = {"); + + for (size_t b = 0; b < 256; ++b) { + if ((b % 4) == 0) + printf("\n\t"); + + printf("0x%08" PRIX32, crc32_table[0][b]); + + if (b != 255) + printf(",%s", (b+1) % 4 == 0 ? "" : " "); + } + + printf("\n};\n"); + + return; +} + + int main(void) { init_crc32_table(); + +#ifdef LZ_HASH_TABLE + print_lz_table(); +#else print_crc32_table(); +#endif + return 0; } diff --git a/src/liblzma/lz/Makefile.inc b/src/liblzma/lz/Makefile.inc index 470d59c0..75742a84 100644 --- a/src/liblzma/lz/Makefile.inc +++ b/src/liblzma/lz/Makefile.inc @@ -10,6 +10,7 @@ liblzma_la_SOURCES += \ lz/lz_encoder.c \ lz/lz_encoder.h \ lz/lz_encoder_hash.h \ + lz/lz_encoder_hash_table.h \ lz/lz_encoder_mf.c endif diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c index 5fd03e84..0e7b7d1d 100644 --- a/src/liblzma/lz/lz_encoder.c +++ b/src/liblzma/lz/lz_encoder.c @@ -13,7 +13,12 @@ #include "lz_encoder.h" #include "lz_encoder_hash.h" -#include "check.h" + +// See lz_encoder_hash.h. This is a bit hackish but avoids making +// endianness a conditional in makefiles. +#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) +# include "lz_encoder_hash_table.h" +#endif struct lzma_coder_s { diff --git a/src/liblzma/lz/lz_encoder_hash.h b/src/liblzma/lz/lz_encoder_hash.h index dd4b1f19..e92251d2 100644 --- a/src/liblzma/lz/lz_encoder_hash.h +++ b/src/liblzma/lz/lz_encoder_hash.h @@ -13,6 +13,17 @@ #ifndef LZMA_LZ_ENCODER_HASH_H #define LZMA_LZ_ENCODER_HASH_H +#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) + // This is to make liblzma produce the same output on big endian + // systems that it does on little endian systems. lz_encoder.c + // takes care of including the actual table. + extern const uint32_t lzma_lz_hash_table[256]; +# define hash_table lzma_lz_hash_table +#else +# include "check.h" +# define hash_table lzma_crc32_table[0] +#endif + #define HASH_2_SIZE (UINT32_C(1) << 10) #define HASH_3_SIZE (UINT32_C(1) << 16) #define HASH_4_SIZE (UINT32_C(1) << 20) @@ -36,31 +47,31 @@ #endif #define hash_3_calc() \ - const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ const uint32_t hash_2_value = temp & HASH_2_MASK; \ const uint32_t hash_value \ = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask #define hash_4_calc() \ - const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ const uint32_t hash_2_value = temp & HASH_2_MASK; \ const uint32_t hash_3_value \ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ - ^ (lzma_crc32_table[0][cur[3]] << 5)) & mf->hash_mask + ^ (hash_table[cur[3]] << 5)) & mf->hash_mask // The following are not currently used. #define hash_5_calc() \ - const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ const uint32_t hash_2_value = temp & HASH_2_MASK; \ const uint32_t hash_3_value \ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ - ^ lzma_crc32_table[0][cur[3]] << 5); \ + ^ hash_table[cur[3]] << 5); \ const uint32_t hash_value \ - = (hash_4_value ^ (lzma_crc32_table[0][cur[4]] << 3)) \ + = (hash_4_value ^ (hash_table[cur[4]] << 3)) \ & mf->hash_mask; \ hash_4_value &= HASH_4_MASK @@ -68,30 +79,30 @@ #define hash_zip_calc() \ const uint32_t hash_value \ = (((uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8)) \ - ^ lzma_crc32_table[0][cur[2]]) & 0xFFFF + ^ hash_table[cur[2]]) & 0xFFFF */ #define hash_zip_calc() \ const uint32_t hash_value \ = (((uint32_t)(cur[2]) | ((uint32_t)(cur[0]) << 8)) \ - ^ lzma_crc32_table[0][cur[1]]) & 0xFFFF + ^ hash_table[cur[1]]) & 0xFFFF #define mt_hash_2_calc() \ const uint32_t hash_2_value \ - = (lzma_crc32_table[0][cur[0]] ^ cur[1]) & HASH_2_MASK + = (hash_table[cur[0]] ^ cur[1]) & HASH_2_MASK #define mt_hash_3_calc() \ - const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ const uint32_t hash_2_value = temp & HASH_2_MASK; \ const uint32_t hash_3_value \ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK #define mt_hash_4_calc() \ - const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ const uint32_t hash_2_value = temp & HASH_2_MASK; \ const uint32_t hash_3_value \ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ const uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ - (lzma_crc32_table[0][cur[3]] << 5)) & HASH_4_MASK + (hash_table[cur[3]] << 5)) & HASH_4_MASK #endif diff --git a/src/liblzma/lz/lz_encoder_hash_table.h b/src/liblzma/lz/lz_encoder_hash_table.h new file mode 100644 index 00000000..8c51717d --- /dev/null +++ b/src/liblzma/lz/lz_encoder_hash_table.h @@ -0,0 +1,68 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +const uint32_t lzma_lz_hash_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c index e48de4fe..b31b0857 100644 --- a/src/liblzma/lz/lz_encoder_mf.c +++ b/src/liblzma/lz/lz_encoder_mf.c @@ -13,7 +13,6 @@ #include "lz_encoder.h" #include "lz_encoder_hash.h" -#include "check.h" /// \brief Find matches starting from the current byte