1
0
mirror of https://git.tukaani.org/xz.git synced 2025-09-14 14:38:24 +00:00
xz/src/liblzma/subblock/subblock_decoder.c
Lasse Collin 7d17818cec Update the code to mostly match the new simpler file format
specification. Simplify things by removing most of the
support for known uncompressed size in most places.
There are some miscellaneous changes here and there too.

The API of liblzma has got many changes and still some
more will be done soon. While most of the code has been
updated, some things are not fixed (the command line tool
will choke with invalid filter chain, if nothing else).

Subblock filter is somewhat broken for now. It will be
updated once the encoded format of the Subblock filter
has been decided.
2008-06-18 18:02:10 +03:00

637 lines
16 KiB
C

///////////////////////////////////////////////////////////////////////////////
//
/// \file subblock_decoder.c
/// \brief Decoder of the Subblock filter
//
// Copyright (C) 2007 Lasse Collin
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#include "subblock_decoder.h"
#include "subblock_decoder_helper.h"
#include "raw_decoder.h"
/// Maximum number of consecutive Subblocks with Subblock Type Padding
#define PADDING_MAX 31
struct lzma_coder_s {
lzma_next_coder next;
enum {
// These require that there is at least one input
// byte available.
SEQ_FLAGS,
SEQ_FILTER_FLAGS,
SEQ_FILTER_END,
SEQ_REPEAT_COUNT_1,
SEQ_REPEAT_COUNT_2,
SEQ_REPEAT_COUNT_3,
SEQ_REPEAT_SIZE,
SEQ_REPEAT_READ_DATA,
SEQ_SIZE_1,
SEQ_SIZE_2,
SEQ_SIZE_3, // This must be right before SEQ_DATA.
// These don't require any input to be available.
SEQ_DATA,
SEQ_REPEAT_FAST,
SEQ_REPEAT_NORMAL,
} sequence;
/// Number of bytes left in the current Subblock Data field.
size_t size;
/// Number of consecutive Subblocks with Subblock Type Padding
uint32_t padding;
/// True when .next.code() has returned LZMA_STREAM_END.
bool next_finished;
/// True when the Subblock decoder has detected End of Payload Marker.
/// This may become true before next_finished becomes true.
bool this_finished;
/// True if Subfilters are allowed.
bool allow_subfilters;
/// Indicates if at least one byte of decoded output has been
/// produced after enabling Subfilter.
bool got_output_with_subfilter;
/// Possible subfilter
lzma_next_coder subfilter;
/// Filter Flags decoder is needed to parse the ID and Properties
/// of the subfilter.
lzma_next_coder filter_flags_decoder;
/// The filter_flags_decoder stores its results here.
lzma_options_filter filter_flags;
/// Options for the Subblock decoder helper. This is used to tell
/// the helper when it should return LZMA_STREAM_END to the subfilter.
lzma_options_subblock_helper helper;
struct {
/// How many times buffer should be repeated
size_t count;
/// Size of the buffer
size_t size;
/// Position in the buffer
size_t pos;
/// Buffer to hold the data to be repeated
uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
} repeat;
/// Temporary buffer needed when the Subblock filter is not the last
/// filter in the chain. The output of the next filter is first
/// decoded into buffer[], which is then used as input for the actual
/// Subblock decoder.
struct {
size_t pos;
size_t size;
uint8_t buffer[LZMA_BUFFER_SIZE];
} temp;
};
/// Values of valid Subblock Flags
enum {
FLAG_PADDING,
FLAG_EOPM,
FLAG_DATA,
FLAG_REPEAT,
FLAG_SET_SUBFILTER,
FLAG_END_SUBFILTER,
};
/// Calls the subfilter and updates coder->uncompressed_size.
static lzma_ret
subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos,
size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size, lzma_action action)
{
assert(coder->subfilter.code != NULL);
// Call the subfilter.
const lzma_ret ret = coder->subfilter.code(
coder->subfilter.coder, allocator,
in, in_pos, in_size, out, out_pos, out_size, action);
return ret;
}
static lzma_ret
decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos,
size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size, lzma_action action)
{
while (*out_pos < out_size && (*in_pos < in_size
|| coder->sequence >= SEQ_DATA))
switch (coder->sequence) {
case SEQ_FLAGS: {
// Do the correct action depending on the Subblock Type.
switch (in[*in_pos] >> 4) {
case FLAG_PADDING:
// Only check that reserved bits are zero.
if (++coder->padding > PADDING_MAX
|| in[*in_pos] & 0x0F)
return LZMA_DATA_ERROR;
++*in_pos;
break;
case FLAG_EOPM:
// There must be no Padding before EOPM.
if (coder->padding != 0)
return LZMA_DATA_ERROR;
// Check that reserved bits are zero.
if (in[*in_pos] & 0x0F)
return LZMA_DATA_ERROR;
// There must be no Subfilter enabled.
if (coder->subfilter.code != NULL)
return LZMA_DATA_ERROR;
++*in_pos;
return LZMA_STREAM_END;
case FLAG_DATA:
// First four bits of the Subblock Data size.
coder->size = in[*in_pos] & 0x0F;
++*in_pos;
coder->got_output_with_subfilter = true;
coder->sequence = SEQ_SIZE_1;
break;
case FLAG_REPEAT:
// First four bits of the Repeat Count. We use
// coder->size as a temporary place for it.
coder->size = in[*in_pos] & 0x0F;
++*in_pos;
coder->got_output_with_subfilter = true;
coder->sequence = SEQ_REPEAT_COUNT_1;
break;
case FLAG_SET_SUBFILTER: {
if (coder->padding != 0 || (in[*in_pos] & 0x0F)
|| coder->subfilter.code != NULL
|| !coder->allow_subfilters)
return LZMA_DATA_ERROR;
assert(coder->filter_flags.options == NULL);
abort();
// return_if_error(lzma_filter_flags_decoder_init(
// &coder->filter_flags_decoder,
// allocator, &coder->filter_flags));
coder->got_output_with_subfilter = false;
++*in_pos;
coder->sequence = SEQ_FILTER_FLAGS;
break;
}
case FLAG_END_SUBFILTER:
if (coder->padding != 0 || (in[*in_pos] & 0x0F)
|| coder->subfilter.code == NULL
|| !coder->got_output_with_subfilter)
return LZMA_DATA_ERROR;
// Tell the helper filter to indicate End of Input
// to our subfilter.
coder->helper.end_was_reached = true;
size_t dummy = 0;
const lzma_ret ret = subfilter_decode(coder, allocator,
NULL, &dummy, 0, out, out_pos,out_size,
action);
// If we didn't reach the end of the subfilter's output
// yet, return to the application. On the next call we
// will get to this same switch-case again, because we
// haven't updated *in_pos yet.
if (ret != LZMA_STREAM_END)
return ret;
// Free Subfilter's memory. This is a bit debatable,
// since we could avoid some malloc()/free() calls
// if the same Subfilter gets used soon again. But
// if Subfilter isn't used again, we could leave
// a memory-hogging filter dangling until someone
// frees Subblock filter itself.
lzma_next_coder_end(&coder->subfilter, allocator);
// Free memory used for subfilter options. This is
// safe, because we don't support any Subfilter that
// would allow pointers in the options structure.
lzma_free(coder->filter_flags.options, allocator);
coder->filter_flags.options = NULL;
++*in_pos;
break;
default:
return LZMA_DATA_ERROR;
}
break;
}
case SEQ_FILTER_FLAGS: {
const lzma_ret ret = coder->filter_flags_decoder.code(
coder->filter_flags_decoder.coder, allocator,
in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
if (ret != LZMA_STREAM_END)
return ret == LZMA_HEADER_ERROR
? LZMA_DATA_ERROR : ret;
// Don't free the filter_flags_decoder. It doesn't take much
// memory and we may need it again.
// Initialize the Subfilter. Subblock and Copy filters are
// not allowed.
if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
return LZMA_DATA_ERROR;
coder->helper.end_was_reached = false;
lzma_options_filter filters[3] = {
{
.id = coder->filter_flags.id,
.options = coder->filter_flags.options,
}, {
.id = LZMA_FILTER_SUBBLOCK_HELPER,
.options = &coder->helper,
}, {
.id = LZMA_VLI_VALUE_UNKNOWN,
.options = NULL,
}
};
// Optimization: We know that LZMA uses End of Payload Marker
// (not End of Input), so we can omit the helper filter.
if (filters[0].id == LZMA_FILTER_LZMA)
filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
return_if_error(lzma_raw_decoder_init(
&coder->subfilter, allocator, filters));
coder->sequence = SEQ_FLAGS;
break;
}
case SEQ_FILTER_END:
// We are in the beginning of a Subblock. The next Subblock
// whose type is not Padding, must indicate end of Subfilter.
if (in[*in_pos] == (FLAG_PADDING << 4)) {
++*in_pos;
break;
}
if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
return LZMA_DATA_ERROR;
coder->sequence = SEQ_FLAGS;
break;
case SEQ_REPEAT_COUNT_1:
case SEQ_SIZE_1:
// We use the same code to parse
// - the Size (28 bits) in Subblocks of type Data; and
// - the Repeat count (28 bits) in Subblocks of type
// Repeating Data.
coder->size |= (size_t)(in[*in_pos]) << 4;
++*in_pos;
++coder->sequence;
break;
case SEQ_REPEAT_COUNT_2:
case SEQ_SIZE_2:
coder->size |= (size_t)(in[*in_pos]) << 12;
++*in_pos;
++coder->sequence;
break;
case SEQ_REPEAT_COUNT_3:
case SEQ_SIZE_3:
coder->size |= (size_t)(in[*in_pos]) << 20;
++*in_pos;
// The real value is the stored value plus one.
++coder->size;
// This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
// SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
++coder->sequence;
break;
case SEQ_REPEAT_SIZE:
// Move the Repeat Count to the correct variable and parse
// the Size of the Data to be repeated.
coder->repeat.count = coder->size;
coder->repeat.size = (size_t)(in[*in_pos]) + 1;
coder->repeat.pos = 0;
// The size of the Data field must be bigger than the number
// of Padding bytes before this Subblock.
if (coder->repeat.size <= coder->padding)
return LZMA_DATA_ERROR;
++*in_pos;
coder->padding = 0;
coder->sequence = SEQ_REPEAT_READ_DATA;
break;
case SEQ_REPEAT_READ_DATA: {
// Fill coder->repeat.buffer[].
const size_t in_avail = in_size - *in_pos;
const size_t out_avail
= coder->repeat.size - coder->repeat.pos;
const size_t copy_size = MIN(in_avail, out_avail);
memcpy(coder->repeat.buffer + coder->repeat.pos,
in + *in_pos, copy_size);
*in_pos += copy_size;
coder->repeat.pos += copy_size;
if (coder->repeat.pos == coder->repeat.size) {
coder->repeat.pos = 0;
if (coder->repeat.size == 1
&& coder->subfilter.code == NULL)
coder->sequence = SEQ_REPEAT_FAST;
else
coder->sequence = SEQ_REPEAT_NORMAL;
}
break;
}
case SEQ_DATA: {
// The size of the Data field must be bigger than the number
// of Padding bytes before this Subblock.
assert(coder->size > 0);
if (coder->size <= coder->padding)
return LZMA_DATA_ERROR;
coder->padding = 0;
// Limit the amount of input to match the available
// Subblock Data size.
size_t in_limit;
if (in_size - *in_pos > coder->size)
in_limit = *in_pos + coder->size;
else
in_limit = in_size;
if (coder->subfilter.code == NULL) {
const size_t copy_size = bufcpy(
in, in_pos, in_limit,
out, out_pos, out_size);
coder->size -= copy_size;
} else {
const size_t in_start = *in_pos;
const lzma_ret ret = subfilter_decode(
coder, allocator,
in, in_pos, in_limit,
out, out_pos, out_size,
action);
// Update the number of unprocessed bytes left in
// this Subblock. This assert() is true because
// in_limit prevents *in_pos getting too big.
assert(*in_pos - in_start <= coder->size);
coder->size -= *in_pos - in_start;
if (ret == LZMA_STREAM_END) {
// End of Subfilter can occur only at
// a Subblock boundary.
if (coder->size != 0)
return LZMA_DATA_ERROR;
// We need a Subblock with Unset
// Subfilter before more data.
coder->sequence = SEQ_FILTER_END;
break;
}
if (ret != LZMA_OK)
return ret;
}
// If we couldn't process the whole Subblock Data yet, return.
if (coder->size > 0)
return LZMA_OK;
coder->sequence = SEQ_FLAGS;
break;
}
case SEQ_REPEAT_FAST: {
// Optimization for cases when there is only one byte to
// repeat and no Subfilter.
const size_t out_avail = out_size - *out_pos;
const size_t copy_size = MIN(coder->repeat.count, out_avail);
memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
*out_pos += copy_size;
coder->repeat.count -= copy_size;
if (coder->repeat.count != 0)
return LZMA_OK;
coder->sequence = SEQ_FLAGS;
break;
}
case SEQ_REPEAT_NORMAL:
do {
// Cycle the repeat buffer if needed.
if (coder->repeat.pos == coder->repeat.size) {
if (--coder->repeat.count == 0) {
coder->sequence = SEQ_FLAGS;
break;
}
coder->repeat.pos = 0;
}
if (coder->subfilter.code == NULL) {
bufcpy(coder->repeat.buffer,
&coder->repeat.pos,
coder->repeat.size,
out, out_pos, out_size);
} else {
const lzma_ret ret = subfilter_decode(
coder, allocator,
coder->repeat.buffer,
&coder->repeat.pos,
coder->repeat.size,
out, out_pos, out_size,
action);
if (ret == LZMA_STREAM_END) {
// End of Subfilter can occur only at
// a Subblock boundary.
if (coder->repeat.pos
!= coder->repeat.size
|| --coder->repeat
.count != 0)
return LZMA_DATA_ERROR;
// We need a Subblock with Unset
// Subfilter before more data.
coder->sequence = SEQ_FILTER_END;
break;
} else if (ret != LZMA_OK) {
return ret;
}
}
} while (*out_pos < out_size);
break;
default:
return LZMA_PROG_ERROR;
}
return LZMA_OK;
}
static lzma_ret
subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size, lzma_action action)
{
if (coder->next.code == NULL)
return decode_buffer(coder, allocator, in, in_pos, in_size,
out, out_pos, out_size, action);
while (*out_pos < out_size) {
if (!coder->next_finished
&& coder->temp.pos == coder->temp.size) {
coder->temp.pos = 0;
coder->temp.size = 0;
const lzma_ret ret = coder->next.code(
coder->next.coder,
allocator, in, in_pos, in_size,
coder->temp.buffer, &coder->temp.size,
LZMA_BUFFER_SIZE, action);
if (ret == LZMA_STREAM_END)
coder->next_finished = true;
else if (coder->temp.size == 0 || ret != LZMA_OK)
return ret;
}
if (coder->this_finished) {
if (coder->temp.pos != coder->temp.size)
return LZMA_DATA_ERROR;
if (coder->next_finished)
return LZMA_STREAM_END;
return LZMA_OK;
}
const lzma_ret ret = decode_buffer(coder, allocator,
coder->temp.buffer, &coder->temp.pos,
coder->temp.size,
out, out_pos, out_size, action);
if (ret == LZMA_STREAM_END)
// The next coder in the chain hasn't finished
// yet. If the input data is valid, there
// must be no more output coming, but the
// next coder may still need a litle more
// input to detect End of Payload Marker.
coder->this_finished = true;
else if (ret != LZMA_OK)
return ret;
else if (coder->next_finished && *out_pos < out_size)
return LZMA_DATA_ERROR;
}
return LZMA_OK;
}
static void
subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
lzma_next_coder_end(&coder->next, allocator);
lzma_next_coder_end(&coder->subfilter, allocator);
lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
lzma_free(coder->filter_flags.options, allocator);
lzma_free(coder, allocator);
return;
}
extern lzma_ret
lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
const lzma_filter_info *filters)
{
if (next->coder == NULL) {
next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
if (next->coder == NULL)
return LZMA_MEM_ERROR;
next->code = &subblock_decode;
next->end = &subblock_decoder_end;
next->coder->next = LZMA_NEXT_CODER_INIT;
next->coder->subfilter = LZMA_NEXT_CODER_INIT;
next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
} else {
lzma_next_coder_end(&next->coder->subfilter, allocator);
lzma_free(next->coder->filter_flags.options, allocator);
}
next->coder->filter_flags.options = NULL;
next->coder->sequence = SEQ_FLAGS;
next->coder->padding = 0;
next->coder->next_finished = false;
next->coder->this_finished = false;
next->coder->temp.pos = 0;
next->coder->temp.size = 0;
if (filters[0].options != NULL)
next->coder->allow_subfilters = ((lzma_options_subblock *)(
filters[0].options))->allow_subfilters;
else
next->coder->allow_subfilters = false;
return lzma_next_filter_init(
&next->coder->next, allocator, filters + 1);
}