liblzma: Creates separate "safe" range decoder mode.

The new "safe" range decoder mode is the same as old range decoder, but now the default behavior of the range decoder will not check if there is enough input or output to complete the operation. When the buffers are close to fully consumed, the "safe" operations must be used instead. This will improve speed because it will reduce the number of branches needed for most of the range decoder operations.
2026-03-14 21:58:13 +00:00 · 2024-02-12 17:09:10 +02:00 · 2024-02-12 17:09:10 +02:00 · e446ab7a18
commit e446ab7a18
parent 7f6d9ca329
2 changed files with 83 additions and 104 deletions
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@ -25,21 +25,13 @@
 #ifdef HAVE_SMALL

 // Macros for (somewhat) size-optimized code.
-#define seq_4(seq) seq
-
-#define seq_6(seq) seq
-
-#define seq_8(seq) seq
-
-#define seq_len(seq) \
-	seq ## _CHOICE, \
-	seq ## _CHOICE2, \
-	seq ## _BITTREE
-
+// This is used to decode the match length (how many bytes must be repeated
+// from the dictionary). This version is used in the Resumable mode and
+// does not unroll any loops.
 #define len_decode(target, ld, pos_state, seq) \
 do { \
 case seq ## _CHOICE: \
-	rc_if_0(ld.choice, seq ## _CHOICE) { \
+	rc_if_0_safe(ld.choice, seq ## _CHOICE) { \
 		rc_update_0(ld.choice); \
 		probs = ld.low[pos_state];\
 		limit = LEN_LOW_SYMBOLS; \
@ -47,7 +39,7 @@ case seq ## _CHOICE: \
 	} else { \
 		rc_update_1(ld.choice); \
 case seq ## _CHOICE2: \
-		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
+		rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \
 			rc_update_0(ld.choice2); \
 			probs = ld.mid[pos_state]; \
 			limit = LEN_MID_SYMBOLS; \
@ -63,89 +55,42 @@ case seq ## _CHOICE2: \
 	symbol = 1; \
 case seq ## _BITTREE: \
 	do { \
-		rc_bit(probs[symbol], , , seq ## _BITTREE); \
+		rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \
 	} while (symbol < limit); \
 	target += symbol - limit; \
 } while (0)

-#else // HAVE_SMALL

-// Unrolled versions
-#define seq_4(seq) \
-	seq ## 0, \
-	seq ## 1, \
-	seq ## 2, \
-	seq ## 3
-
-#define seq_6(seq) \
-	seq ## 0, \
-	seq ## 1, \
-	seq ## 2, \
-	seq ## 3, \
-	seq ## 4, \
-	seq ## 5
-
-#define seq_8(seq) \
-	seq ## 0, \
-	seq ## 1, \
-	seq ## 2, \
-	seq ## 3, \
-	seq ## 4, \
-	seq ## 5, \
-	seq ## 6, \
-	seq ## 7
-
-#define seq_len(seq) \
-	seq ## _CHOICE, \
-	seq ## _LOW0, \
-	seq ## _LOW1, \
-	seq ## _LOW2, \
-	seq ## _CHOICE2, \
-	seq ## _MID0, \
-	seq ## _MID1, \
-	seq ## _MID2, \
-	seq ## _HIGH0, \
-	seq ## _HIGH1, \
-	seq ## _HIGH2, \
-	seq ## _HIGH3, \
-	seq ## _HIGH4, \
-	seq ## _HIGH5, \
-	seq ## _HIGH6, \
-	seq ## _HIGH7
-
-#define len_decode(target, ld, pos_state, seq) \
+// This is the faster version of the match length decoder that does not
+// worry about being resumable. It unrolls the bittree decoding loop.
+#define len_decode_fast(target, ld, pos_state) \
 do { \
 	symbol = 1; \
-case seq ## _CHOICE: \
-	rc_if_0(ld.choice, seq ## _CHOICE) { \
+	rc_if_0(ld.choice) { \
 		rc_update_0(ld.choice); \
-		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
-		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
-		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
+		rc_bit(ld.low[pos_state][symbol], , ); \
+		rc_bit(ld.low[pos_state][symbol], , ); \
+		rc_bit(ld.low[pos_state][symbol], , ); \
 		target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
 	} else { \
 		rc_update_1(ld.choice); \
-case seq ## _CHOICE2: \
-		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
+		rc_if_0(ld.choice2) { \
 			rc_update_0(ld.choice2); \
-			rc_bit_case(ld.mid[pos_state][symbol], , , \
-					seq ## _MID0); \
-			rc_bit_case(ld.mid[pos_state][symbol], , , \
-					seq ## _MID1); \
-			rc_bit_case(ld.mid[pos_state][symbol], , , \
-					seq ## _MID2); \
+			rc_bit(ld.mid[pos_state][symbol], , ); \
+			rc_bit(ld.mid[pos_state][symbol], , ); \
+			rc_bit(ld.mid[pos_state][symbol], , ); \
 			target = symbol - LEN_MID_SYMBOLS \
 					+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
 		} else { \
 			rc_update_1(ld.choice2); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
-			rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
+			rc_bit(ld.high[symbol], , ); \
+			rc_bit(ld.high[symbol], , ); \
+			rc_bit(ld.high[symbol], , ); \
+			rc_bit(ld.high[symbol], , ); \
+			rc_bit(ld.high[symbol], , ); \
+			rc_bit(ld.high[symbol], , ); \
+			rc_bit(ld.high[symbol], , ); \
+			rc_bit(ld.high[symbol], , ); \
 			target = symbol - LEN_HIGH_SYMBOLS \
 					+ MATCH_LEN_MIN \
 					+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
@ -153,8 +98,6 @@ case seq ## _CHOICE2: \
 	} \
 } while (0)

-#endif // HAVE_SMALL
-

 /// Length decoder probabilities; see comments in lzma_common.h.
 typedef struct {
@ -889,7 +832,6 @@ out:
 }


-
 static void
 lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
 		bool allow_eopm)
--- a/src/liblzma/rangecoder/range_decoder.h
+++ b/src/liblzma/rangecoder/range_decoder.h
@ -80,10 +80,22 @@ do { \
 	((range_decoder).code == 0)


-/// Read the next input byte if needed. If more input is needed but there is
+// Read the next input byte if needed.
+#define rc_normalize() \
+do { \
+	if (rc.range < RC_TOP_VALUE) { \
+		rc.range <<= RC_SHIFT_BITS; \
+		rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \
+	} \
+} while (0)
+
+
+/// If more input is needed but there is
 /// no more input available, "goto out" is used to jump out of the main
-/// decoder loop.
-#define rc_normalize(seq) \
+/// decoder loop. The "_safe" macros are used in the Resumable decoder
+/// mode in order to save the sequence to continue decoding from that
+/// point later.
+#define rc_normalize_safe(seq) \
 do { \
 	if (rc.range < RC_TOP_VALUE) { \
 		if (unlikely(rc_in_pos == in_size)) { \
@ -99,7 +111,7 @@ do { \
 /// Start decoding a bit. This must be used together with rc_update_0()
 /// and rc_update_1():
 ///
-///     rc_if_0(prob, seq) {
+///     rc_if_0(prob) {
 ///         rc_update_0(prob);
 ///         // Do something
 ///     } else {
@ -107,8 +119,14 @@ do { \
 ///         // Do something else
 ///     }
 ///
-#define rc_if_0(prob, seq) \
-	rc_normalize(seq); \
+#define rc_if_0(prob) \
+	rc_normalize(); \
+	rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \
+	if (rc.code < rc_bound)
+
+
+#define rc_if_0_safe(prob, seq) \
+	rc_normalize_safe(seq); \
 	rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \
 	if (rc.code < rc_bound)

@ -136,9 +154,21 @@ do { \
 /// This macro is used as the last step in bittree reverse decoders since
 /// those don't use "symbol" for anything else than indexing the probability
 /// arrays.
-#define rc_bit_last(prob, action0, action1, seq) \
+#define rc_bit_last(prob, action0, action1) \
 do { \
-	rc_if_0(prob, seq) { \
+	rc_if_0(prob) { \
+		rc_update_0(prob); \
+		action0; \
+	} else { \
+		rc_update_1(prob); \
+		action1; \
+	} \
+} while (0)
+
+
+#define rc_bit_last_safe(prob, action0, action1, seq) \
+do { \
+	rc_if_0_safe(prob, seq) { \
 		rc_update_0(prob); \
 		action0; \
 	} else { \
@ -150,26 +180,33 @@ do { \

 /// Decodes one bit, updates "symbol", and runs action0 or action1 depending
 /// on the decoded bit.
-#define rc_bit(prob, action0, action1, seq) \
+#define rc_bit(prob, action0, action1) \
 	rc_bit_last(prob, \
+		symbol <<= 1; action0, \
+		symbol = (symbol << 1) + 1; action1);
+
+
+#define rc_bit_safe(prob, action0, action1, seq) \
+	rc_bit_last_safe(prob, \
 		symbol <<= 1; action0, \
 		symbol = (symbol << 1) + 1; action1, \
 		seq);

-
-/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled
-/// loops more readable because the code isn't littered with "case"
-/// statements. On the other hand this also makes it less readable, since
-/// spotting the places where the decoder loop may be restarted is less
-/// obvious.
-#define rc_bit_case(prob, action0, action1, seq) \
-	case seq: rc_bit(prob, action0, action1, seq)
-
-
 /// Decode a bit without using a probability.
-#define rc_direct(dest, seq) \
+#define rc_direct(dest) \
 do { \
-	rc_normalize(seq); \
+	rc_normalize(); \
+	rc.range >>= 1; \
+	rc.code -= rc.range; \
+	rc_bound = UINT32_C(0) - (rc.code >> 31); \
+	rc.code += rc.range & rc_bound; \
+	dest = (dest << 1) + (rc_bound + 1); \
+} while (0)
+
+
+#define rc_direct_safe(dest, seq) \
+do { \
+	rc_normalize_safe(seq); \
 	rc.range >>= 1; \
 	rc.code -= rc.range; \
 	rc_bound = UINT32_C(0) - (rc.code >> 31); \