Subblock decoder: Don't exit the main loop in decode_buffer()

too early if we hit End of Input while decoding a Subblock of
type Repeating Data. To keep the loop termination condition
elegant, the order of enumerations in coder->sequence were
changed.

To keep the case-labels in roughly the same order as the
enumerations in coder->sequence, large chunks of code was
moved around. This made the diff big and ugly compared to
the amount of the actual changes made.
This commit is contained in:
Lasse Collin 2008-01-17 18:56:53 +02:00
parent b254bd97b1
commit 33be3c0e24
1 changed files with 139 additions and 133 deletions

View File

@ -30,20 +30,24 @@ struct lzma_coder_s {
lzma_next_coder next; lzma_next_coder next;
enum { enum {
// These require that there is at least one input
// byte available.
SEQ_FLAGS, SEQ_FLAGS,
SEQ_SIZE_1, SEQ_FILTER_FLAGS,
SEQ_SIZE_2, SEQ_FILTER_END,
SEQ_SIZE_3,
SEQ_DATA,
SEQ_REPEAT_COUNT_1, SEQ_REPEAT_COUNT_1,
SEQ_REPEAT_COUNT_2, SEQ_REPEAT_COUNT_2,
SEQ_REPEAT_COUNT_3, SEQ_REPEAT_COUNT_3,
SEQ_REPEAT_SIZE, SEQ_REPEAT_SIZE,
SEQ_REPEAT_READ_DATA, SEQ_REPEAT_READ_DATA,
SEQ_SIZE_1,
SEQ_SIZE_2,
SEQ_SIZE_3, // This must be right before SEQ_DATA.
// These don't require any input to be available.
SEQ_DATA,
SEQ_REPEAT_FAST, SEQ_REPEAT_FAST,
SEQ_REPEAT_NORMAL, SEQ_REPEAT_NORMAL,
SEQ_FILTER_FLAGS,
SEQ_FILTER_END,
} sequence; } sequence;
/// Number of bytes left in the current Subblock Data field. /// Number of bytes left in the current Subblock Data field.
@ -167,7 +171,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
size_t *restrict out_pos, size_t out_size, lzma_action action) size_t *restrict out_pos, size_t out_size, lzma_action action)
{ {
while (*out_pos < out_size && (*in_pos < in_size while (*out_pos < out_size && (*in_pos < in_size
|| coder->sequence == SEQ_DATA)) || coder->sequence >= SEQ_DATA))
switch (coder->sequence) { switch (coder->sequence) {
case SEQ_FLAGS: { case SEQ_FLAGS: {
if ((in[*in_pos] >> 4) != FLAG_PADDING) if ((in[*in_pos] >> 4) != FLAG_PADDING)
@ -284,8 +288,68 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break; break;
} }
case SEQ_SIZE_1: case SEQ_FILTER_FLAGS: {
const lzma_ret ret = coder->filter_flags_decoder.code(
coder->filter_flags_decoder.coder, allocator,
in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
if (ret != LZMA_STREAM_END)
return ret == LZMA_HEADER_ERROR
? LZMA_DATA_ERROR : ret;
// Don't free the filter_flags_decoder. It doesn't take much
// memory and we may need it again.
// Initialize the Subfilter. Subblock and Copy filters are
// not allowed.
if (coder->filter_flags.id == LZMA_FILTER_COPY
|| coder->filter_flags.id
== LZMA_FILTER_SUBBLOCK)
return LZMA_DATA_ERROR;
coder->helper.end_was_reached = false;
lzma_options_filter filters[3] = {
{
.id = coder->filter_flags.id,
.options = coder->filter_flags.options,
}, {
.id = LZMA_FILTER_SUBBLOCK_HELPER,
.options = &coder->helper,
}, {
.id = LZMA_VLI_VALUE_UNKNOWN,
.options = NULL,
}
};
// Optimization: We know that LZMA uses End of Payload Marker
// (not End of Input), so we can omit the helper filter.
if (filters[0].id == LZMA_FILTER_LZMA)
filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
return_if_error(lzma_raw_decoder_init(
&coder->subfilter, allocator,
filters, LZMA_VLI_VALUE_UNKNOWN, false));
coder->sequence = SEQ_FLAGS;
break;
}
case SEQ_FILTER_END:
// We are in the beginning of a Subblock. The next Subblock
// whose type is not Padding, must indicate end of Subfilter.
if (in[*in_pos] == (FLAG_PADDING << 4)) {
++*in_pos;
break;
}
if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
return LZMA_DATA_ERROR;
coder->sequence = SEQ_FLAGS;
break;
case SEQ_REPEAT_COUNT_1: case SEQ_REPEAT_COUNT_1:
case SEQ_SIZE_1:
// We use the same code to parse // We use the same code to parse
// - the Size (28 bits) in Subblocks of type Data; and // - the Size (28 bits) in Subblocks of type Data; and
// - the Repeat count (28 bits) in Subblocks of type // - the Repeat count (28 bits) in Subblocks of type
@ -295,21 +359,23 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
++coder->sequence; ++coder->sequence;
break; break;
case SEQ_SIZE_2:
case SEQ_REPEAT_COUNT_2: case SEQ_REPEAT_COUNT_2:
case SEQ_SIZE_2:
coder->size |= (size_t)(in[*in_pos]) << 12; coder->size |= (size_t)(in[*in_pos]) << 12;
++*in_pos; ++*in_pos;
++coder->sequence; ++coder->sequence;
break; break;
case SEQ_SIZE_3:
case SEQ_REPEAT_COUNT_3: case SEQ_REPEAT_COUNT_3:
case SEQ_SIZE_3:
coder->size |= (size_t)(in[*in_pos]) << 20; coder->size |= (size_t)(in[*in_pos]) << 20;
++*in_pos;
// The real value is the stored value plus one. // The real value is the stored value plus one.
++coder->size; ++coder->size;
++*in_pos; // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
// SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
++coder->sequence; ++coder->sequence;
break; break;
@ -348,6 +414,68 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break; break;
} }
case SEQ_DATA: {
// Limit the amount of input to match the available
// Subblock Data size.
size_t in_limit;
if (in_size - *in_pos > coder->size)
in_limit = *in_pos + coder->size;
else
in_limit = in_size;
if (coder->subfilter.code == NULL) {
const size_t copy_size = bufcpy(
in, in_pos, in_limit,
out, out_pos, out_size);
coder->size -= copy_size;
if (update_uncompressed_size(coder, copy_size))
return LZMA_DATA_ERROR;
} else {
const size_t in_start = *in_pos;
const lzma_ret ret = subfilter_decode(
coder, allocator,
in, in_pos, in_limit,
out, out_pos, out_size,
action);
// Update the number of unprocessed bytes left in
// this Subblock. This assert() is true because
// in_limit prevents *in_pos getting too big.
assert(*in_pos - in_start <= coder->size);
coder->size -= *in_pos - in_start;
if (ret == LZMA_STREAM_END) {
// End of Subfilter can occur only at
// a Subblock boundary.
if (coder->size != 0)
return LZMA_DATA_ERROR;
// We need a Subblock with Unset
// Subfilter before more data.
coder->sequence = SEQ_FILTER_END;
break;
}
if (ret != LZMA_OK)
return ret;
}
// If we couldn't process the whole Subblock Data yet, return.
if (coder->size > 0)
return LZMA_OK;
// Check if we have decoded all the data.
if (coder->uncompressed_size == 0
&& coder->subfilter.code == NULL)
return LZMA_STREAM_END;
coder->sequence = SEQ_FLAGS;
break;
}
case SEQ_REPEAT_FAST: { case SEQ_REPEAT_FAST: {
// Optimization for cases when there is only one byte to // Optimization for cases when there is only one byte to
// repeat and no Subfilter. // repeat and no Subfilter.
@ -432,128 +560,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break; break;
case SEQ_DATA: {
// Limit the amount of input to match the available
// Subblock Data size.
size_t in_limit;
if (in_size - *in_pos > coder->size)
in_limit = *in_pos + coder->size;
else
in_limit = in_size;
if (coder->subfilter.code == NULL) {
const size_t copy_size = bufcpy(
in, in_pos, in_limit,
out, out_pos, out_size);
coder->size -= copy_size;
if (update_uncompressed_size(coder, copy_size))
return LZMA_DATA_ERROR;
} else {
const size_t in_start = *in_pos;
const lzma_ret ret = subfilter_decode(
coder, allocator,
in, in_pos, in_limit,
out, out_pos, out_size,
action);
// Update the number of unprocessed bytes left in
// this Subblock. This assert() is true because
// in_limit prevents *in_pos getting too big.
assert(*in_pos - in_start <= coder->size);
coder->size -= *in_pos - in_start;
if (ret == LZMA_STREAM_END) {
// End of Subfilter can occur only at
// a Subblock boundary.
if (coder->size != 0)
return LZMA_DATA_ERROR;
// We need a Subblock with Unset
// Subfilter before more data.
coder->sequence = SEQ_FILTER_END;
break;
}
if (ret != LZMA_OK)
return ret;
}
// If we couldn't process the whole Subblock Data yet, return.
if (coder->size > 0)
return LZMA_OK;
// Check if we have decoded all the data.
if (coder->uncompressed_size == 0
&& coder->subfilter.code == NULL)
return LZMA_STREAM_END;
coder->sequence = SEQ_FLAGS;
break;
}
case SEQ_FILTER_FLAGS: {
const lzma_ret ret = coder->filter_flags_decoder.code(
coder->filter_flags_decoder.coder, allocator,
in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
if (ret != LZMA_STREAM_END)
return ret == LZMA_HEADER_ERROR
? LZMA_DATA_ERROR : ret;
// Don't free the filter_flags_decoder. It doesn't take much
// memory and we may need it again.
// Initialize the Subfilter. Subblock and Copy filters are
// not allowed.
if (coder->filter_flags.id == LZMA_FILTER_COPY
|| coder->filter_flags.id
== LZMA_FILTER_SUBBLOCK)
return LZMA_DATA_ERROR;
coder->helper.end_was_reached = false;
lzma_options_filter filters[3] = {
{
.id = coder->filter_flags.id,
.options = coder->filter_flags.options,
}, {
.id = LZMA_FILTER_SUBBLOCK_HELPER,
.options = &coder->helper,
}, {
.id = LZMA_VLI_VALUE_UNKNOWN,
.options = NULL,
}
};
// Optimization: We know that LZMA uses End of Payload Marker
// (not End of Input), so we can omit the helper filter.
if (filters[0].id == LZMA_FILTER_LZMA)
filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
return_if_error(lzma_raw_decoder_init(
&coder->subfilter, allocator,
filters, LZMA_VLI_VALUE_UNKNOWN, false));
coder->sequence = SEQ_FLAGS;
break;
}
case SEQ_FILTER_END:
// We are in the beginning of a Subblock. The next Subblock
// whose type is not Padding, must indicate end of Subfilter.
if (in[*in_pos] == (FLAG_PADDING << 4)) {
++*in_pos;
break;
}
if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
return LZMA_DATA_ERROR;
coder->sequence = SEQ_FLAGS;
break;
default: default:
return LZMA_PROG_ERROR; return LZMA_PROG_ERROR;
} }