mirror of
https://git.tukaani.org/xz.git
synced 2025-04-01 13:20:59 +00:00
Turns out that this is needed for .lzma files as the spec in LZMA SDK says that end marker may be present even if the size is stored in the header. Such files are rare but exist in the real world. The code in liblzma is so old that the spec didn't exist in LZMA SDK back then and I had understood that such files weren't possible (the lzma tool in LZMA SDK didn't create such files). This modifies the internal API so that LZMA decoder can be told if EOPM is allowed even when the uncompressed size is known. It's allowed with .lzma and not with other uses. Thanks to Karl Beldan for reporting the problem.
220 lines
6.1 KiB
C
220 lines
6.1 KiB
C
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
/// \file microlzma_decoder.c
|
|
/// \brief Decode MicroLZMA format
|
|
//
|
|
// Author: Lasse Collin
|
|
//
|
|
// This file has been put into the public domain.
|
|
// You can do whatever you want with this file.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "lzma_decoder.h"
|
|
#include "lz_decoder.h"
|
|
|
|
|
|
typedef struct {
|
|
/// LZMA1 decoder
|
|
lzma_next_coder lzma;
|
|
|
|
/// Compressed size of the stream as given by the application.
|
|
/// This must be exactly correct.
|
|
///
|
|
/// This will be decremented when input is read.
|
|
uint64_t comp_size;
|
|
|
|
/// Uncompressed size of the stream as given by the application.
|
|
/// This may be less than the actual uncompressed size if
|
|
/// uncomp_size_is_exact is false.
|
|
///
|
|
/// This will be decremented when output is produced.
|
|
lzma_vli uncomp_size;
|
|
|
|
/// LZMA dictionary size as given by the application
|
|
uint32_t dict_size;
|
|
|
|
/// If true, the exact uncompressed size is known. If false,
|
|
/// uncomp_size may be smaller than the real uncompressed size;
|
|
/// uncomp_size may never be bigger than the real uncompressed size.
|
|
bool uncomp_size_is_exact;
|
|
|
|
/// True once the first byte of the MicroLZMA stream
|
|
/// has been processed.
|
|
bool props_decoded;
|
|
} lzma_microlzma_coder;
|
|
|
|
|
|
static lzma_ret
|
|
microlzma_decode(void *coder_ptr, const lzma_allocator *allocator,
|
|
const uint8_t *restrict in, size_t *restrict in_pos,
|
|
size_t in_size, uint8_t *restrict out,
|
|
size_t *restrict out_pos, size_t out_size, lzma_action action)
|
|
{
|
|
lzma_microlzma_coder *coder = coder_ptr;
|
|
|
|
// Remember the in start position so that we can update comp_size.
|
|
const size_t in_start = *in_pos;
|
|
|
|
// Remember the out start position so that we can update uncomp_size.
|
|
const size_t out_start = *out_pos;
|
|
|
|
// Limit the amount of input so that the decoder won't read more than
|
|
// comp_size. This is required when uncomp_size isn't exact because
|
|
// in that case the LZMA decoder will try to decode more input even
|
|
// when it has no output space (it can be looking for EOPM).
|
|
if (in_size - *in_pos > coder->comp_size)
|
|
in_size = *in_pos + (size_t)(coder->comp_size);
|
|
|
|
// When the exact uncompressed size isn't known, we must limit
|
|
// the available output space to prevent the LZMA decoder from
|
|
// trying to decode too much.
|
|
if (!coder->uncomp_size_is_exact
|
|
&& out_size - *out_pos > coder->uncomp_size)
|
|
out_size = *out_pos + (size_t)(coder->uncomp_size);
|
|
|
|
if (!coder->props_decoded) {
|
|
// There must be at least one byte of input to decode
|
|
// the properties byte.
|
|
if (*in_pos >= in_size)
|
|
return LZMA_OK;
|
|
|
|
lzma_options_lzma options = {
|
|
.preset_dict = NULL,
|
|
.preset_dict_size = 0,
|
|
};
|
|
|
|
// The properties are stored as bitwise-negation
|
|
// of the typical encoding.
|
|
if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
|
|
return LZMA_OPTIONS_ERROR;
|
|
|
|
++*in_pos;
|
|
|
|
// Initialize the decoder.
|
|
options.dict_size = coder->dict_size;
|
|
lzma_filter_info filters[2] = {
|
|
{
|
|
.init = &lzma_lzma_decoder_init,
|
|
.options = &options,
|
|
}, {
|
|
.init = NULL,
|
|
}
|
|
};
|
|
|
|
return_if_error(lzma_next_filter_init(&coder->lzma,
|
|
allocator, filters));
|
|
|
|
// Use a hack to set the uncompressed size.
|
|
if (coder->uncomp_size_is_exact)
|
|
lzma_lz_decoder_uncompressed(coder->lzma.coder,
|
|
coder->uncomp_size, false);
|
|
|
|
// Pass one dummy 0x00 byte to the LZMA decoder since that
|
|
// is what it expects the first byte to be.
|
|
const uint8_t dummy_in = 0;
|
|
size_t dummy_in_pos = 0;
|
|
if (coder->lzma.code(coder->lzma.coder, allocator,
|
|
&dummy_in, &dummy_in_pos, 1,
|
|
out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
|
|
return LZMA_PROG_ERROR;
|
|
|
|
assert(dummy_in_pos == 1);
|
|
coder->props_decoded = true;
|
|
}
|
|
|
|
// The rest is normal LZMA decoding.
|
|
lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
|
|
in, in_pos, in_size,
|
|
out, out_pos, out_size, action);
|
|
|
|
// Update the remaining compressed size.
|
|
assert(coder->comp_size >= *in_pos - in_start);
|
|
coder->comp_size -= *in_pos - in_start;
|
|
|
|
if (coder->uncomp_size_is_exact) {
|
|
// After successful decompression of the complete stream
|
|
// the compressed size must match.
|
|
if (ret == LZMA_STREAM_END && coder->comp_size != 0)
|
|
ret = LZMA_DATA_ERROR;
|
|
} else {
|
|
// Update the amount of output remaining.
|
|
assert(coder->uncomp_size >= *out_pos - out_start);
|
|
coder->uncomp_size -= *out_pos - out_start;
|
|
|
|
// - We must not get LZMA_STREAM_END because the stream
|
|
// shouldn't have EOPM.
|
|
// - We must use uncomp_size to determine when to
|
|
// return LZMA_STREAM_END.
|
|
if (ret == LZMA_STREAM_END)
|
|
ret = LZMA_DATA_ERROR;
|
|
else if (coder->uncomp_size == 0)
|
|
ret = LZMA_STREAM_END;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static void
|
|
microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
|
|
{
|
|
lzma_microlzma_coder *coder = coder_ptr;
|
|
lzma_next_end(&coder->lzma, allocator);
|
|
lzma_free(coder, allocator);
|
|
return;
|
|
}
|
|
|
|
|
|
static lzma_ret
|
|
microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
|
uint64_t comp_size,
|
|
uint64_t uncomp_size, bool uncomp_size_is_exact,
|
|
uint32_t dict_size)
|
|
{
|
|
lzma_next_coder_init(µlzma_decoder_init, next, allocator);
|
|
|
|
lzma_microlzma_coder *coder = next->coder;
|
|
|
|
if (coder == NULL) {
|
|
coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
|
|
if (coder == NULL)
|
|
return LZMA_MEM_ERROR;
|
|
|
|
next->coder = coder;
|
|
next->code = µlzma_decode;
|
|
next->end = µlzma_decoder_end;
|
|
|
|
coder->lzma = LZMA_NEXT_CODER_INIT;
|
|
}
|
|
|
|
// The public API is uint64_t but the internal LZ decoder API uses
|
|
// lzma_vli.
|
|
if (uncomp_size > LZMA_VLI_MAX)
|
|
return LZMA_OPTIONS_ERROR;
|
|
|
|
coder->comp_size = comp_size;
|
|
coder->uncomp_size = uncomp_size;
|
|
coder->uncomp_size_is_exact = uncomp_size_is_exact;
|
|
coder->dict_size = dict_size;
|
|
|
|
coder->props_decoded = false;
|
|
|
|
return LZMA_OK;
|
|
}
|
|
|
|
|
|
extern LZMA_API(lzma_ret)
|
|
lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size,
|
|
uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
|
|
uint32_t dict_size)
|
|
{
|
|
lzma_next_strm_init(microlzma_decoder_init, strm, comp_size,
|
|
uncomp_size, uncomp_size_is_exact, dict_size);
|
|
|
|
strm->internal->supported_actions[LZMA_RUN] = true;
|
|
strm->internal->supported_actions[LZMA_FINISH] = true;
|
|
|
|
return LZMA_OK;
|
|
}
|