liblzma: Creates separate "safe" range decoder mode.

The new "safe" range decoder mode is the same as old range decoder, but
now the default behavior of the range decoder will not check if there is
enough input or output to complete the operation. When the buffers are
close to fully consumed, the "safe" operations must be used instead. This
will improve speed because it will reduce the number of branches needed
for most of the range decoder operations.
This commit is contained in:
Jia Tan 2024-02-12 17:09:10 +02:00 committed by Lasse Collin
parent 7f6d9ca329
commit e446ab7a18
2 changed files with 83 additions and 104 deletions

View File

@ -25,21 +25,13 @@
#ifdef HAVE_SMALL #ifdef HAVE_SMALL
// Macros for (somewhat) size-optimized code. // Macros for (somewhat) size-optimized code.
#define seq_4(seq) seq // This is used to decode the match length (how many bytes must be repeated
// from the dictionary). This version is used in the Resumable mode and
#define seq_6(seq) seq // does not unroll any loops.
#define seq_8(seq) seq
#define seq_len(seq) \
seq ## _CHOICE, \
seq ## _CHOICE2, \
seq ## _BITTREE
#define len_decode(target, ld, pos_state, seq) \ #define len_decode(target, ld, pos_state, seq) \
do { \ do { \
case seq ## _CHOICE: \ case seq ## _CHOICE: \
rc_if_0(ld.choice, seq ## _CHOICE) { \ rc_if_0_safe(ld.choice, seq ## _CHOICE) { \
rc_update_0(ld.choice); \ rc_update_0(ld.choice); \
probs = ld.low[pos_state];\ probs = ld.low[pos_state];\
limit = LEN_LOW_SYMBOLS; \ limit = LEN_LOW_SYMBOLS; \
@ -47,7 +39,7 @@ case seq ## _CHOICE: \
} else { \ } else { \
rc_update_1(ld.choice); \ rc_update_1(ld.choice); \
case seq ## _CHOICE2: \ case seq ## _CHOICE2: \
rc_if_0(ld.choice2, seq ## _CHOICE2) { \ rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \
rc_update_0(ld.choice2); \ rc_update_0(ld.choice2); \
probs = ld.mid[pos_state]; \ probs = ld.mid[pos_state]; \
limit = LEN_MID_SYMBOLS; \ limit = LEN_MID_SYMBOLS; \
@ -63,89 +55,42 @@ case seq ## _CHOICE2: \
symbol = 1; \ symbol = 1; \
case seq ## _BITTREE: \ case seq ## _BITTREE: \
do { \ do { \
rc_bit(probs[symbol], , , seq ## _BITTREE); \ rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \
} while (symbol < limit); \ } while (symbol < limit); \
target += symbol - limit; \ target += symbol - limit; \
} while (0) } while (0)
#else // HAVE_SMALL
// Unrolled versions // This is the faster version of the match length decoder that does not
#define seq_4(seq) \ // worry about being resumable. It unrolls the bittree decoding loop.
seq ## 0, \ #define len_decode_fast(target, ld, pos_state) \
seq ## 1, \
seq ## 2, \
seq ## 3
#define seq_6(seq) \
seq ## 0, \
seq ## 1, \
seq ## 2, \
seq ## 3, \
seq ## 4, \
seq ## 5
#define seq_8(seq) \
seq ## 0, \
seq ## 1, \
seq ## 2, \
seq ## 3, \
seq ## 4, \
seq ## 5, \
seq ## 6, \
seq ## 7
#define seq_len(seq) \
seq ## _CHOICE, \
seq ## _LOW0, \
seq ## _LOW1, \
seq ## _LOW2, \
seq ## _CHOICE2, \
seq ## _MID0, \
seq ## _MID1, \
seq ## _MID2, \
seq ## _HIGH0, \
seq ## _HIGH1, \
seq ## _HIGH2, \
seq ## _HIGH3, \
seq ## _HIGH4, \
seq ## _HIGH5, \
seq ## _HIGH6, \
seq ## _HIGH7
#define len_decode(target, ld, pos_state, seq) \
do { \ do { \
symbol = 1; \ symbol = 1; \
case seq ## _CHOICE: \ rc_if_0(ld.choice) { \
rc_if_0(ld.choice, seq ## _CHOICE) { \
rc_update_0(ld.choice); \ rc_update_0(ld.choice); \
rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ rc_bit(ld.low[pos_state][symbol], , ); \
rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ rc_bit(ld.low[pos_state][symbol], , ); \
rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ rc_bit(ld.low[pos_state][symbol], , ); \
target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
} else { \ } else { \
rc_update_1(ld.choice); \ rc_update_1(ld.choice); \
case seq ## _CHOICE2: \ rc_if_0(ld.choice2) { \
rc_if_0(ld.choice2, seq ## _CHOICE2) { \
rc_update_0(ld.choice2); \ rc_update_0(ld.choice2); \
rc_bit_case(ld.mid[pos_state][symbol], , , \ rc_bit(ld.mid[pos_state][symbol], , ); \
seq ## _MID0); \ rc_bit(ld.mid[pos_state][symbol], , ); \
rc_bit_case(ld.mid[pos_state][symbol], , , \ rc_bit(ld.mid[pos_state][symbol], , ); \
seq ## _MID1); \
rc_bit_case(ld.mid[pos_state][symbol], , , \
seq ## _MID2); \
target = symbol - LEN_MID_SYMBOLS \ target = symbol - LEN_MID_SYMBOLS \
+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
} else { \ } else { \
rc_update_1(ld.choice2); \ rc_update_1(ld.choice2); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ rc_bit(ld.high[symbol], , ); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ rc_bit(ld.high[symbol], , ); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ rc_bit(ld.high[symbol], , ); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ rc_bit(ld.high[symbol], , ); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ rc_bit(ld.high[symbol], , ); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ rc_bit(ld.high[symbol], , ); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ rc_bit(ld.high[symbol], , ); \
rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ rc_bit(ld.high[symbol], , ); \
target = symbol - LEN_HIGH_SYMBOLS \ target = symbol - LEN_HIGH_SYMBOLS \
+ MATCH_LEN_MIN \ + MATCH_LEN_MIN \
+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
@ -153,8 +98,6 @@ case seq ## _CHOICE2: \
} \ } \
} while (0) } while (0)
#endif // HAVE_SMALL
/// Length decoder probabilities; see comments in lzma_common.h. /// Length decoder probabilities; see comments in lzma_common.h.
typedef struct { typedef struct {
@ -889,7 +832,6 @@ out:
} }
static void static void
lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size, lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
bool allow_eopm) bool allow_eopm)

View File

@ -80,10 +80,22 @@ do { \
((range_decoder).code == 0) ((range_decoder).code == 0)
/// Read the next input byte if needed. If more input is needed but there is // Read the next input byte if needed.
#define rc_normalize() \
do { \
if (rc.range < RC_TOP_VALUE) { \
rc.range <<= RC_SHIFT_BITS; \
rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \
} \
} while (0)
/// If more input is needed but there is
/// no more input available, "goto out" is used to jump out of the main /// no more input available, "goto out" is used to jump out of the main
/// decoder loop. /// decoder loop. The "_safe" macros are used in the Resumable decoder
#define rc_normalize(seq) \ /// mode in order to save the sequence to continue decoding from that
/// point later.
#define rc_normalize_safe(seq) \
do { \ do { \
if (rc.range < RC_TOP_VALUE) { \ if (rc.range < RC_TOP_VALUE) { \
if (unlikely(rc_in_pos == in_size)) { \ if (unlikely(rc_in_pos == in_size)) { \
@ -99,7 +111,7 @@ do { \
/// Start decoding a bit. This must be used together with rc_update_0() /// Start decoding a bit. This must be used together with rc_update_0()
/// and rc_update_1(): /// and rc_update_1():
/// ///
/// rc_if_0(prob, seq) { /// rc_if_0(prob) {
/// rc_update_0(prob); /// rc_update_0(prob);
/// // Do something /// // Do something
/// } else { /// } else {
@ -107,8 +119,14 @@ do { \
/// // Do something else /// // Do something else
/// } /// }
/// ///
#define rc_if_0(prob, seq) \ #define rc_if_0(prob) \
rc_normalize(seq); \ rc_normalize(); \
rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \
if (rc.code < rc_bound)
#define rc_if_0_safe(prob, seq) \
rc_normalize_safe(seq); \
rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \
if (rc.code < rc_bound) if (rc.code < rc_bound)
@ -136,9 +154,21 @@ do { \
/// This macro is used as the last step in bittree reverse decoders since /// This macro is used as the last step in bittree reverse decoders since
/// those don't use "symbol" for anything else than indexing the probability /// those don't use "symbol" for anything else than indexing the probability
/// arrays. /// arrays.
#define rc_bit_last(prob, action0, action1, seq) \ #define rc_bit_last(prob, action0, action1) \
do { \ do { \
rc_if_0(prob, seq) { \ rc_if_0(prob) { \
rc_update_0(prob); \
action0; \
} else { \
rc_update_1(prob); \
action1; \
} \
} while (0)
#define rc_bit_last_safe(prob, action0, action1, seq) \
do { \
rc_if_0_safe(prob, seq) { \
rc_update_0(prob); \ rc_update_0(prob); \
action0; \ action0; \
} else { \ } else { \
@ -150,26 +180,33 @@ do { \
/// Decodes one bit, updates "symbol", and runs action0 or action1 depending /// Decodes one bit, updates "symbol", and runs action0 or action1 depending
/// on the decoded bit. /// on the decoded bit.
#define rc_bit(prob, action0, action1, seq) \ #define rc_bit(prob, action0, action1) \
rc_bit_last(prob, \ rc_bit_last(prob, \
symbol <<= 1; action0, \
symbol = (symbol << 1) + 1; action1);
#define rc_bit_safe(prob, action0, action1, seq) \
rc_bit_last_safe(prob, \
symbol <<= 1; action0, \ symbol <<= 1; action0, \
symbol = (symbol << 1) + 1; action1, \ symbol = (symbol << 1) + 1; action1, \
seq); seq);
/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled
/// loops more readable because the code isn't littered with "case"
/// statements. On the other hand this also makes it less readable, since
/// spotting the places where the decoder loop may be restarted is less
/// obvious.
#define rc_bit_case(prob, action0, action1, seq) \
case seq: rc_bit(prob, action0, action1, seq)
/// Decode a bit without using a probability. /// Decode a bit without using a probability.
#define rc_direct(dest, seq) \ #define rc_direct(dest) \
do { \ do { \
rc_normalize(seq); \ rc_normalize(); \
rc.range >>= 1; \
rc.code -= rc.range; \
rc_bound = UINT32_C(0) - (rc.code >> 31); \
rc.code += rc.range & rc_bound; \
dest = (dest << 1) + (rc_bound + 1); \
} while (0)
#define rc_direct_safe(dest, seq) \
do { \
rc_normalize_safe(seq); \
rc.range >>= 1; \ rc.range >>= 1; \
rc.code -= rc.range; \ rc.code -= rc.range; \
rc_bound = UINT32_C(0) - (rc.code >> 31); \ rc_bound = UINT32_C(0) - (rc.code >> 31); \