diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h index 581f3507..4ad7ce6a 100644 --- a/src/liblzma/api/lzma/container.h +++ b/src/liblzma/api/lzma/container.h @@ -690,11 +690,22 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( * The special behavior of lzma_code() applies to lzma_erofs_encoder() only. * * \param strm Pointer to properly prepared lzma_stream + * \param comp_size Compressed size of the EROFS LZMA stream. + * The caller must somehow know this exactly. * \param uncomp_size Uncompressed size of the EROFS LZMA stream. - * The caller must somehow know this. Note that - * while the EROFS LZMA decoder in XZ Embedded needs - * also the compressed size, the implementation in - * liblzma doesn't need to know the compressed size. + * If the exact uncompressed size isn't known, this + * can be set to a value that is at most as big as + * the exact uncompressed size would be, but then the + * next argument uncomp_size_is_exact must be false. + * \param uncomp_size_is_exact + * If true, uncomp_size must be exactly correct. + * This will improve error detection at the end of + * the stream. If the exact uncompressed size isn't + * known, this must be false. uncomp_size must still + * be at most as big as the exact uncompressed size + * is. Setting this to false when the exact size is + * known will work but error detection at the end of + * the stream will be weaker. * \param dict_size LZMA dictionary size that was used when * compressing the data. It is OK to use a bigger * value too but liblzma will then allocate more @@ -705,4 +716,6 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( * dictionary than actually required.) */ extern LZMA_API(lzma_ret) lzma_erofs_decoder( - lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size); + lzma_stream *strm, uint64_t comp_size, + uint64_t uncomp_size, lzma_bool uncomp_size_is_exact, + uint32_t dict_size); diff --git a/src/liblzma/common/erofs_decoder.c b/src/liblzma/common/erofs_decoder.c index ef584373..816e2482 100644 --- a/src/liblzma/common/erofs_decoder.c +++ b/src/liblzma/common/erofs_decoder.c @@ -18,12 +18,27 @@ typedef struct { /// LZMA1 decoder lzma_next_coder lzma; - /// Uncompressed size of the stream as given by the application + /// Compressed size of the stream as given by the application. + /// This must be exactly correct. + /// + /// This will be decremented when input is read. + uint64_t comp_size; + + /// Uncompressed size of the stream as given by the application. + /// This may be less than the actual uncompressed size if + /// uncomp_size_is_exact is false. + /// + /// This will be decremented when output is produced. lzma_vli uncomp_size; /// LZMA dictionary size as given by the application uint32_t dict_size; + /// If true, the exact uncompressed size is known. If false, + /// uncomp_size may be smaller than the real uncompressed size; + /// uncomp_size may never be bigger than the real uncompressed size. + bool uncomp_size_is_exact; + /// True once the first byte of the EROFS LZMA stream /// has been processed. bool props_decoded; @@ -38,6 +53,26 @@ erofs_decode(void *coder_ptr, const lzma_allocator *allocator, { lzma_erofs_coder *coder = coder_ptr; + // Remember the in start position so that we can update comp_size. + const size_t in_start = *in_pos; + + // Remember the out start position so that we can update uncomp_size. + const size_t out_start = *out_pos; + + // Limit the amount of input so that the decoder won't read more than + // comp_size. This is required when uncomp_size isn't exact because + // in that case the LZMA decoder will try to decode more input even + // when it has no output space (it can be looking for EOPM). + if (in_size - *in_pos > coder->comp_size) + in_size = *in_pos + (size_t)(coder->comp_size); + + // When the exact uncompressed size isn't known, we must limit + // the available output space to prevent the LZMA decoder from + // trying to decode too much. + if (!coder->uncomp_size_is_exact + && out_size - *out_pos > coder->uncomp_size) + out_size = *out_pos + (size_t)(coder->uncomp_size); + if (!coder->props_decoded) { // There must be at least one byte of input to decode // the properties byte. @@ -71,8 +106,9 @@ erofs_decode(void *coder_ptr, const lzma_allocator *allocator, allocator, filters)); // Use a hack to set the uncompressed size. - lzma_lz_decoder_uncompressed(coder->lzma.coder, - coder->uncomp_size); + if (coder->uncomp_size_is_exact) + lzma_lz_decoder_uncompressed(coder->lzma.coder, + coder->uncomp_size); // Pass one dummy 0x00 byte to the LZMA decoder since that // is what it expects the first byte to be. @@ -88,9 +124,30 @@ erofs_decode(void *coder_ptr, const lzma_allocator *allocator, } // The rest is normal LZMA decoding. - return coder->lzma.code(coder->lzma.coder, allocator, + lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); + + // Update the remaining compressed size. + assert(coder->comp_size >= *in_pos - in_start); + coder->comp_size -= *in_pos - in_start; + + if (!coder->uncomp_size_is_exact) { + // Update the amount of output remaining. + assert(coder->uncomp_size >= *out_pos - out_start); + coder->uncomp_size -= *out_pos - out_start; + + // - We must not get LZMA_STREAM_END because the stream + // shouldn't have EOPM. + // - We must use uncomp_size to determine when to + // return LZMA_STREAM_END. + if (ret == LZMA_STREAM_END) + ret = LZMA_DATA_ERROR; + else if (coder->uncomp_size == 0) + ret = LZMA_STREAM_END; + } + + return ret; } @@ -106,7 +163,9 @@ erofs_decoder_end(void *coder_ptr, const lzma_allocator *allocator) static lzma_ret erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, - uint64_t uncomp_size, uint32_t dict_size) + uint64_t comp_size, + uint64_t uncomp_size, bool uncomp_size_is_exact, + uint32_t dict_size) { lzma_next_coder_init(&erofs_decoder_init, next, allocator); @@ -124,10 +183,14 @@ erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, coder->lzma = LZMA_NEXT_CODER_INIT; } + // The public API is uint64_t but the internal LZ decoder API uses + // lzma_vli. if (uncomp_size > LZMA_VLI_MAX) return LZMA_OPTIONS_ERROR; + coder->comp_size = comp_size; coder->uncomp_size = uncomp_size; + coder->uncomp_size_is_exact = uncomp_size_is_exact; coder->dict_size = dict_size; coder->props_decoded = false; @@ -137,9 +200,12 @@ erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, extern LZMA_API(lzma_ret) -lzma_erofs_decoder(lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size) +lzma_erofs_decoder(lzma_stream *strm, uint64_t comp_size, + uint64_t uncomp_size, lzma_bool uncomp_size_is_exact, + uint32_t dict_size) { - lzma_next_strm_init(erofs_decoder_init, strm, uncomp_size, dict_size); + lzma_next_strm_init(erofs_decoder_init, strm, comp_size, + uncomp_size, uncomp_size_is_exact, dict_size); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true;