diff --git a/src/liblzma/subblock/subblock_encoder.c b/src/liblzma/subblock/subblock_encoder.c index d033ea2c..6fc420b5 100644 --- a/src/liblzma/subblock/subblock_encoder.c +++ b/src/liblzma/subblock/subblock_encoder.c @@ -21,17 +21,27 @@ #include "raw_encoder.h" +/// Maximum number of repeats that a single Repeating Data can indicate. +/// This is directly from the file format specification. #define REPEAT_COUNT_MAX (1U << 28) -/// Number of bytes the data chunk being repeated must be before we care -/// about alignment. This is somewhat arbitrary. It just doesn't make sense -/// to waste bytes for alignment when the data chunk is very small. -/// -/// TODO Rename and use this also for Subblock Data? -#define RLE_MIN_SIZE_FOR_ALIGN 3 +/// Number of bytes the data chunk (not including the header part) must be +/// before we care about alignment. This is somewhat arbitrary. It just +/// doesn't make sense to waste bytes for alignment when the data chunk +/// is very small. +#define MIN_CHUNK_SIZE_FOR_ALIGN 4 +/// Number of bytes of the header part of Subblock Type `Data'. This is +/// used as the `skew' argument for subblock_align(). +#define ALIGN_SKEW_DATA 4 + +/// Like above but for Repeating Data. +#define ALIGN_SKEW_REPEATING_DATA 5 + +/// Writes one byte to output buffer and updates the alignment counter. #define write_byte(b) \ do { \ + assert(*out_pos < out_size); \ out[*out_pos] = b; \ ++*out_pos; \ ++coder->alignment.out_pos; \ @@ -77,10 +87,6 @@ struct lzma_coder_s { /// LZMA_SUBBLOCK_ALIGNMENT_DEFAULT if options is NULL. uint32_t multiple; - /// Number of input bytes that we have already read but - /// not yet started writing out. - uint32_t in_pending; - /// Number of input bytes which we have processed and started /// writing out. 32-bit integer is enough since we care only /// about the lowest bits when fixing alignment. @@ -100,6 +106,12 @@ struct lzma_coder_s { /// Allocated size of the buffer. size_t limit; + + /// Number of input bytes that we have already read but + /// not yet started writing out. This can be different + /// to `size' when using Subfilter. That's why we track + /// in_pending separately for RLE (see below). + uint32_t in_pending; } subblock; struct { @@ -112,7 +124,10 @@ struct lzma_coder_s { /// Number of times the first `size' bytes of buffer[] /// will be repeated. - lzma_vli count; + uint64_t count; + + /// Like subblock.in_pending above, but for RLE. + uint32_t in_pending; } rle; struct { @@ -156,6 +171,7 @@ struct lzma_coder_s { } subfilter; + /// Temporary buffer used when we are not the last filter in the chain. struct { size_t pos; size_t size; @@ -170,27 +186,28 @@ struct lzma_coder_s { /// a multiple of coder->alignment.multiple. static bool subblock_align(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, uint32_t skew) + size_t *restrict out_pos, size_t out_size, + size_t chunk_size, uint32_t skew) { assert(*out_pos < out_size); - const uint32_t target = coder->alignment.in_pos - % coder->alignment.multiple; + // Fix the alignment only if it makes sense at least a little. + if (chunk_size >= MIN_CHUNK_SIZE_FOR_ALIGN) { + const uint32_t target = coder->alignment.in_pos + % coder->alignment.multiple; - while ((coder->alignment.out_pos + skew) - % coder->alignment.multiple != target) { - // Zero indicates padding. - write_byte(0x00); + while ((coder->alignment.out_pos + skew) + % coder->alignment.multiple != target) { + // Zero indicates padding. + write_byte(0x00); - // Check if output buffer got full and indicate it to - // the caller. - if (*out_pos == out_size) - return true; + // Check if output buffer got full and indicate it to + // the caller. + if (*out_pos == out_size) + return true; + } } - coder->alignment.in_pos += coder->alignment.in_pending; - coder->alignment.in_pending = 0; - // Output buffer is not full. return false; } @@ -245,10 +262,20 @@ subblock_rle_flush(lzma_coder *coder) } } - if (coder->rle.count > REPEAT_COUNT_MAX) + if (coder->rle.count == 1) { + // The buffer should be repeated only once. It is + // waste of space to use Repeating Data. Instead, + // write a regular Data Subblock. See SEQ_RLE_COUNT_0 + // in subblock_buffer() for more info. + coder->tmp = coder->rle.size - 1; + } else if (coder->rle.count > REPEAT_COUNT_MAX) { + // There's so much to repeat that it doesn't fit into + // 28-bit integer. We will write two or more Subblocks + // of type Repeating Data. coder->tmp = REPEAT_COUNT_MAX - 1; - else + } else { coder->tmp = coder->rle.count - 1; + } coder->sequence = SEQ_RLE_COUNT_0; @@ -372,7 +399,7 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, assert(coder->subfilter.subcoder.code == NULL); // No Subfilter is enabled, just copy the data as is. - coder->alignment.in_pending += bufcpy( + coder->subblock.in_pending += bufcpy( in, in_pos, in_size, coder->subblock.data, &coder->subblock.size, @@ -415,7 +442,7 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, ? LZMA_FINISH : action); const size_t in_used = *in_pos - in_start; - coder->alignment.in_pending += in_used; + coder->subblock.in_pending += in_used; if (in_used > 0) coder->subfilter.got_input = true; @@ -527,16 +554,21 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, coder->rle.size); // Test if coder->subblock.data is repeating. + // If coder->rle.count would overflow, we + // force flushing. Forced flushing shouldn't + // really happen in real-world situations. const size_t count = coder->subblock.size / coder->rle.size; - if (is_repeating(coder->rle.buffer, - coder->rle.size, - coder->subblock.data, count)) { - if (LZMA_VLI_VALUE_MAX - count - < coder->rle.count) - return LZMA_PROG_ERROR; - + if (UINT64_MAX - count > coder->rle.count + && is_repeating( + coder->rle.buffer, + coder->rle.size, + coder->subblock.data, + count)) { coder->rle.count += count; + coder->rle.in_pending += coder + ->subblock.in_pending; + coder->subblock.in_pending = 0; coder->subblock.size = 0; } else if (coder->rle.count > 0) { @@ -621,17 +653,42 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, break; case SEQ_RLE_COUNT_0: - // Make the Data field properly aligned, but only if the data - // chunk to be repeated isn't extremely small. We have four - // bytes for Count and one byte for Size, thus the number five. - if (coder->rle.size >= RLE_MIN_SIZE_FOR_ALIGN - && subblock_align( - coder, out, out_pos, out_size, 5)) - return LZMA_OK; - assert(coder->rle.count > 0); - write_byte(0x30 | (coder->tmp & 0x0F)); + if (coder->rle.count == 1) { + // The buffer should be repeated only once. Fix + // the alignment and write the first byte of + // Subblock Type `Data'. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, ALIGN_SKEW_DATA)) + return LZMA_OK; + + write_byte(0x20 | (coder->tmp & 0x0F)); + + } else { + // We have something to actually repeat, which should + // mean that it takes less space with run-length + // encoding. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, + ALIGN_SKEW_REPEATING_DATA)) + return LZMA_OK; + + write_byte(0x30 | (coder->tmp & 0x0F)); + } + + // NOTE: If we have to write more than one Repeating Data + // due to rle.count > REPEAT_COUNT_MAX, the subsequent + // Repeating Data Subblocks may get wrong alignment, because + // we add rle.in_pending to alignment.in_pos at once instead + // of adding only as much as this particular Repeating Data + // consumed input data. Correct alignment is always restored + // after all the required Repeating Data Subblocks have been + // written. This problem occurs in such a weird cases that + // it's not worth fixing. + coder->alignment.out_pos += coder->rle.size; + coder->alignment.in_pos += coder->rle.in_pending; + coder->rle.in_pending = 0; coder->sequence = SEQ_RLE_COUNT_1; break; @@ -649,12 +706,18 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, case SEQ_RLE_COUNT_3: write_byte(coder->tmp >> 20); + // Again, see if we are writing regular Data or Repeating Data. + // In the former case, we skip SEQ_RLE_SIZE. + if (coder->rle.count == 1) + coder->sequence = SEQ_RLE_DATA; + else + coder->sequence = SEQ_RLE_SIZE; + if (coder->rle.count > REPEAT_COUNT_MAX) coder->rle.count -= REPEAT_COUNT_MAX; else coder->rle.count = 0; - coder->sequence = SEQ_RLE_SIZE; break; case SEQ_RLE_SIZE: @@ -670,17 +733,20 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, if (coder->pos < coder->rle.size) return LZMA_OK; - coder->alignment.out_pos += coder->rle.size; - coder->pos = 0; coder->sequence = SEQ_FLUSH; break; case SEQ_DATA_SIZE_0: // We need four bytes for the Size field. - if (subblock_align(coder, out, out_pos, out_size, 4)) + if (subblock_align(coder, out, out_pos, out_size, + coder->subblock.size, ALIGN_SKEW_DATA)) return LZMA_OK; + coder->alignment.out_pos += coder->subblock.size; + coder->alignment.in_pos += coder->subblock.in_pending; + coder->subblock.in_pending = 0; + write_byte(0x20 | (coder->tmp & 0x0F)); coder->sequence = SEQ_DATA_SIZE_1; break; @@ -706,7 +772,6 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, if (coder->pos < coder->subblock.size) return LZMA_OK; - coder->alignment.out_pos += coder->subblock.size; coder->subblock.size = 0; coder->pos = 0; coder->sequence = SEQ_FLUSH; @@ -714,7 +779,9 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, case SEQ_SUBFILTER_INIT: { assert(coder->subblock.size == 0); + assert(coder->subblock.in_pending == 0); assert(coder->rle.count == 0); + assert(coder->rle.in_pending == 0); assert(coder->subfilter.mode == SUB_SET); assert(coder->options != NULL); @@ -884,11 +951,12 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, == LZMA_VLI_VALUE_UNKNOWN; next->coder->pos = 0; - next->coder->alignment.in_pending = 0; next->coder->alignment.in_pos = 0; next->coder->alignment.out_pos = 0; next->coder->subblock.size = 0; + next->coder->subblock.in_pending = 0; next->coder->rle.count = 0; + next->coder->rle.in_pending = 0; next->coder->subfilter.mode = SUB_NONE; next->coder->subfilter.mode_locked = false;