From bfde3b24a5ae25ce53c854762b6148952386b025 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 11 Mar 2008 15:35:34 +0200 Subject: [PATCH] Apply a minor speed optimization to LZMA decoder. --- src/liblzma/lzma/lzma_decoder.c | 85 +++++++++++++++++---------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index 9e1226f7..fce9594a 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -179,43 +179,41 @@ decode_dummy(const lzma_coder *restrict coder, coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0)); uint32_t symbol = 1; - if (!is_char_state(state)) { - // Decode literal with match byte. + if (is_char_state(state)) { + // Decode literal without match byte. + do { + if_bit_0(subcoder[symbol]) { + update_bit_0_dummy(); + symbol <<= 1; + } else { + update_bit_1_dummy(); + symbol = (symbol << 1) | 1; + } + } while (symbol < 0x100); - assert(rep0 != UINT32_MAX); + } else { + // Decode literal with match byte. uint32_t match_byte = lz_get_byte(coder->lz, rep0); + uint32_t subcoder_offset = 0x100; do { match_byte <<= 1; - const uint32_t match_bit = match_byte & 0x100; - const uint32_t subcoder_index = 0x100 + match_bit + symbol; + const uint32_t match_bit = match_byte & subcoder_offset; + const uint32_t subcoder_index + = subcoder_offset + match_bit + symbol; if_bit_0(subcoder[subcoder_index]) { update_bit_0_dummy(); symbol <<= 1; - if (match_bit != 0) - break; + subcoder_offset &= ~match_bit; } else { update_bit_1_dummy(); symbol = (symbol << 1) | 1; - if (match_bit == 0) - break; + subcoder_offset &= match_bit; } } while (symbol < 0x100); } - // Decode literal without match byte. This is also - // the tail of the with-match-byte function. - while (symbol < 0x100) { - if_bit_0(subcoder[symbol]) { - update_bit_0_dummy(); - symbol <<= 1; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - } - } - break; } @@ -366,43 +364,46 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, now_pos, lz_get_byte(coder->lz, 0)); uint32_t symbol = 1; - if (!is_char_state(state)) { - // Decode literal with match byte. + if (is_char_state(state)) { + // Decode literal without match byte. + do { + if_bit_0(subcoder[symbol]) { + update_bit_0(subcoder[symbol]); + symbol <<= 1; + } else { + update_bit_1(subcoder[symbol]); + symbol = (symbol << 1) | 1; + } + } while (symbol < 0x100); - assert(rep0 != UINT32_MAX); + } else { + // Decode literal with match byte. + // + // The usage of subcoder_offset allows omitting some + // branches, which should give tiny speed improvement on + // some CPUs. subcoder_offset gets set to zero if match_bit + // didn't match. uint32_t match_byte = lz_get_byte(coder->lz, rep0); + uint32_t subcoder_offset = 0x100; do { match_byte <<= 1; - const uint32_t match_bit = match_byte & 0x100; - const uint32_t subcoder_index = 0x100 + match_bit + symbol; + const uint32_t match_bit = match_byte & subcoder_offset; + const uint32_t subcoder_index + = subcoder_offset + match_bit + symbol; if_bit_0(subcoder[subcoder_index]) { update_bit_0(subcoder[subcoder_index]); symbol <<= 1; - if (match_bit != 0) - break; + subcoder_offset &= ~match_bit; } else { update_bit_1(subcoder[subcoder_index]); symbol = (symbol << 1) | 1; - if (match_bit == 0) - break; + subcoder_offset &= match_bit; } } while (symbol < 0x100); } - // Decode literal without match byte. This is also - // the tail of the with-match-byte function. - while (symbol < 0x100) { - if_bit_0(subcoder[symbol]) { - update_bit_0(subcoder[symbol]); - symbol <<= 1; - } else { - update_bit_1(subcoder[symbol]); - symbol = (symbol << 1) | 1; - } - } - // Put the decoded byte to the dictionary, update the // decoder state, and start a new decoding loop. coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);