mirror of
https://git.tukaani.org/xz.git
synced 2025-04-03 06:10:57 +00:00
lzma_metadata.header_metadata_size was not properly set to zero if the Metadata had only the Metadata Flags field.
577 lines
14 KiB
C
577 lines
14 KiB
C
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
/// \file metadata_decoder.c
|
|
/// \brief Decodes metadata stored in Metadata Blocks
|
|
//
|
|
// Copyright (C) 2007 Lasse Collin
|
|
//
|
|
// This library is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
// License as published by the Free Software Foundation; either
|
|
// version 2.1 of the License, or (at your option) any later version.
|
|
//
|
|
// This library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// Lesser General Public License for more details.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "metadata_decoder.h"
|
|
#include "block_decoder.h"
|
|
|
|
|
|
/// Maximum size of a single Extra Record. Again, this is mostly to make
|
|
/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
|
|
/// be smaller.
|
|
#define EXTRA_SIZE_MAX (SIZE_MAX / 4)
|
|
|
|
|
|
struct lzma_coder_s {
|
|
enum {
|
|
SEQ_FLAGS,
|
|
SEQ_HEADER_METADATA_SIZE,
|
|
SEQ_TOTAL_SIZE,
|
|
SEQ_UNCOMPRESSED_SIZE,
|
|
SEQ_INDEX_COUNT,
|
|
SEQ_INDEX_ALLOC,
|
|
SEQ_INDEX_TOTAL_SIZE,
|
|
SEQ_INDEX_UNCOMPRESSED_SIZE,
|
|
SEQ_EXTRA_PREPARE,
|
|
SEQ_EXTRA_ALLOC,
|
|
SEQ_EXTRA_ID,
|
|
SEQ_EXTRA_SIZE,
|
|
SEQ_EXTRA_DATA_ALLOC,
|
|
SEQ_EXTRA_DATA_COPY,
|
|
SEQ_EXTRA_DUMMY_ALLOC,
|
|
SEQ_EXTRA_DUMMY_ID,
|
|
SEQ_EXTRA_DUMMY_SIZE,
|
|
SEQ_EXTRA_DUMMY_COPY,
|
|
} sequence;
|
|
|
|
/// Number of "things" left to be parsed. If we hit end of input
|
|
/// when this isn't zero, we have corrupt Metadata Block.
|
|
size_t todo_count;
|
|
|
|
/// Position in variable-length integers
|
|
size_t pos;
|
|
|
|
/// Temporary variable needed to decode variables whose type
|
|
/// is size_t instead of lzma_vli.
|
|
lzma_vli tmp;
|
|
|
|
/// Pointer to target structure to hold the parsed results.
|
|
lzma_metadata *metadata;
|
|
|
|
/// The Index Record we currently are parsing
|
|
lzma_index *index_current;
|
|
|
|
/// Number of Records in Index
|
|
size_t index_count;
|
|
|
|
/// Sum of Total Size fields in the Index
|
|
lzma_vli index_total_size;
|
|
|
|
/// Sum of Uncompressed Size fields in the Index
|
|
lzma_vli index_uncompressed_size;
|
|
|
|
/// True if Extra is present.
|
|
bool has_extra;
|
|
|
|
/// True if we have been requested to store the Extra to *metadata.
|
|
bool want_extra;
|
|
|
|
/// Pointer to the end of the Extra Record list.
|
|
lzma_extra *extra_tail;
|
|
|
|
/// Dummy Extra Record used when only verifying integrity of Extra
|
|
/// (not storing it to RAM).
|
|
lzma_extra extra_dummy;
|
|
|
|
/// Block decoder
|
|
lzma_next_coder block_decoder;
|
|
|
|
/// buffer[buffer_pos] is the next byte to process.
|
|
size_t buffer_pos;
|
|
|
|
/// buffer[buffer_size] is the first byte to not process.
|
|
size_t buffer_size;
|
|
|
|
/// Temporary buffer to which encoded Metadata is read before
|
|
/// it is parsed.
|
|
uint8_t buffer[LZMA_BUFFER_SIZE];
|
|
};
|
|
|
|
|
|
/// Reads a variable-length integer to coder->num.
|
|
#define read_vli(num) \
|
|
do { \
|
|
const lzma_ret ret = lzma_vli_decode( \
|
|
&num, &coder->pos, \
|
|
coder->buffer, &coder->buffer_pos, \
|
|
coder->buffer_size); \
|
|
if (ret != LZMA_STREAM_END) \
|
|
return ret; \
|
|
\
|
|
coder->pos = 0; \
|
|
} while (0)
|
|
|
|
|
|
static lzma_ret
|
|
process(lzma_coder *coder, lzma_allocator *allocator)
|
|
{
|
|
while (coder->buffer_pos < coder->buffer_size)
|
|
switch (coder->sequence) {
|
|
case SEQ_FLAGS:
|
|
// Reserved bits must be unset.
|
|
if (coder->buffer[coder->buffer_pos] & 0x70)
|
|
return LZMA_HEADER_ERROR;
|
|
|
|
// If Size of Header Metadata is present, prepare the
|
|
// variable for variable-length integer decoding. Otherwise
|
|
// set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
|
|
// field isn't present.
|
|
if (coder->buffer[coder->buffer_pos] & 0x01) {
|
|
coder->metadata->header_metadata_size = 0;
|
|
++coder->todo_count;
|
|
}
|
|
|
|
if (coder->buffer[coder->buffer_pos] & 0x02) {
|
|
coder->metadata->total_size = 0;
|
|
++coder->todo_count;
|
|
}
|
|
|
|
if (coder->buffer[coder->buffer_pos] & 0x04) {
|
|
coder->metadata->uncompressed_size = 0;
|
|
++coder->todo_count;
|
|
}
|
|
|
|
if (coder->buffer[coder->buffer_pos] & 0x08) {
|
|
// Setting index_count to 1 is just to indicate that
|
|
// Index is present. The real size is parsed later.
|
|
coder->index_count = 1;
|
|
++coder->todo_count;
|
|
}
|
|
|
|
coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
|
|
!= 0;
|
|
|
|
++coder->buffer_pos;
|
|
coder->sequence = SEQ_HEADER_METADATA_SIZE;
|
|
break;
|
|
|
|
case SEQ_HEADER_METADATA_SIZE:
|
|
if (coder->metadata->header_metadata_size
|
|
!= LZMA_VLI_VALUE_UNKNOWN) {
|
|
read_vli(coder->metadata->header_metadata_size);
|
|
|
|
if (coder->metadata->header_metadata_size == 0)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
--coder->todo_count;
|
|
}
|
|
|
|
coder->sequence = SEQ_TOTAL_SIZE;
|
|
break;
|
|
|
|
case SEQ_TOTAL_SIZE:
|
|
if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
|
|
read_vli(coder->metadata->total_size);
|
|
|
|
if (coder->metadata->total_size == 0)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
--coder->todo_count;
|
|
}
|
|
|
|
coder->sequence = SEQ_UNCOMPRESSED_SIZE;
|
|
break;
|
|
|
|
case SEQ_UNCOMPRESSED_SIZE:
|
|
if (coder->metadata->uncompressed_size
|
|
!= LZMA_VLI_VALUE_UNKNOWN) {
|
|
read_vli(coder->metadata->uncompressed_size);
|
|
--coder->todo_count;
|
|
}
|
|
|
|
coder->sequence = SEQ_INDEX_COUNT;
|
|
break;
|
|
|
|
case SEQ_INDEX_COUNT:
|
|
if (coder->index_count == 0) {
|
|
coder->sequence = SEQ_EXTRA_PREPARE;
|
|
break;
|
|
}
|
|
|
|
read_vli(coder->tmp);
|
|
|
|
// Index must not be empty nor far too big (wouldn't fit
|
|
// in RAM).
|
|
if (coder->tmp == 0 || coder->tmp
|
|
>= SIZE_MAX / sizeof(lzma_index))
|
|
return LZMA_DATA_ERROR;
|
|
|
|
coder->index_count = (size_t)(coder->tmp);
|
|
coder->tmp = 0;
|
|
|
|
coder->sequence = SEQ_INDEX_ALLOC;
|
|
break;
|
|
|
|
case SEQ_INDEX_ALLOC: {
|
|
lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
|
|
if (i == NULL)
|
|
return LZMA_MEM_ERROR;
|
|
|
|
i->total_size = 0;
|
|
i->uncompressed_size = 0;
|
|
i->next = NULL;
|
|
|
|
if (coder->metadata->index == NULL)
|
|
coder->metadata->index = i;
|
|
else
|
|
coder->index_current->next = i;
|
|
|
|
coder->index_current = i;
|
|
|
|
coder->sequence = SEQ_INDEX_TOTAL_SIZE;
|
|
}
|
|
|
|
// Fall through
|
|
|
|
case SEQ_INDEX_TOTAL_SIZE: {
|
|
read_vli(coder->index_current->total_size);
|
|
|
|
coder->index_total_size += coder->index_current->total_size;
|
|
if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
// No Block can have Total Size of zero bytes.
|
|
if (coder->index_current->total_size == 0)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
if (--coder->index_count == 0) {
|
|
// If Total Size is present, it must match the sum
|
|
// of Total Sizes in Index.
|
|
if (coder->metadata->total_size
|
|
!= LZMA_VLI_VALUE_UNKNOWN
|
|
&& coder->metadata->total_size
|
|
!= coder->index_total_size)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
coder->index_current = coder->metadata->index;
|
|
coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
|
|
} else {
|
|
coder->sequence = SEQ_INDEX_ALLOC;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case SEQ_INDEX_UNCOMPRESSED_SIZE: {
|
|
read_vli(coder->index_current->uncompressed_size);
|
|
|
|
coder->index_uncompressed_size
|
|
+= coder->index_current->uncompressed_size;
|
|
if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
coder->index_current = coder->index_current->next;
|
|
if (coder->index_current == NULL) {
|
|
if (coder->metadata->uncompressed_size
|
|
!= LZMA_VLI_VALUE_UNKNOWN
|
|
&& coder->metadata->uncompressed_size
|
|
!= coder->index_uncompressed_size)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
--coder->todo_count;
|
|
coder->sequence = SEQ_EXTRA_PREPARE;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case SEQ_EXTRA_PREPARE:
|
|
assert(coder->todo_count == 0);
|
|
|
|
// If we get here, we have at least one byte of input left.
|
|
// If "Extra is present" flag is unset in Metadata Flags,
|
|
// it means that there is some garbage and we return an error.
|
|
if (!coder->has_extra)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
if (!coder->want_extra) {
|
|
coder->extra_tail = &coder->extra_dummy;
|
|
coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
|
|
break;
|
|
}
|
|
|
|
coder->sequence = SEQ_EXTRA_ALLOC;
|
|
|
|
// Fall through
|
|
|
|
case SEQ_EXTRA_ALLOC: {
|
|
lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
|
|
if (e == NULL)
|
|
return LZMA_MEM_ERROR;
|
|
|
|
e->next = NULL;
|
|
e->id = 0;
|
|
e->size = 0;
|
|
e->data = NULL;
|
|
|
|
if (coder->metadata->extra == NULL)
|
|
coder->metadata->extra = e;
|
|
else
|
|
coder->extra_tail->next = e;
|
|
|
|
coder->extra_tail = e;
|
|
|
|
coder->todo_count = 1;
|
|
coder->sequence = SEQ_EXTRA_ID;
|
|
}
|
|
|
|
// Fall through
|
|
|
|
case SEQ_EXTRA_ID:
|
|
case SEQ_EXTRA_DUMMY_ID:
|
|
read_vli(coder->extra_tail->id);
|
|
|
|
if (coder->extra_tail->id == 0) {
|
|
coder->extra_tail->size = 0;
|
|
coder->extra_tail->data = NULL;
|
|
coder->todo_count = 0;
|
|
--coder->sequence;
|
|
} else {
|
|
++coder->sequence;
|
|
}
|
|
|
|
break;
|
|
|
|
case SEQ_EXTRA_SIZE:
|
|
case SEQ_EXTRA_DUMMY_SIZE:
|
|
read_vli(coder->tmp);
|
|
|
|
if (coder->tmp == 0) {
|
|
// We have no Data in the Extra Record. Don't
|
|
// allocate any memory for it. Go back to
|
|
// SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC.
|
|
coder->tmp = 0;
|
|
coder->sequence -= 2;
|
|
coder->todo_count = 0;
|
|
} else {
|
|
++coder->sequence;
|
|
}
|
|
|
|
break;
|
|
|
|
case SEQ_EXTRA_DATA_ALLOC: {
|
|
if (coder->tmp > EXTRA_SIZE_MAX)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
coder->extra_tail->size = (size_t)(coder->tmp);
|
|
coder->tmp = 0;
|
|
|
|
// We reserve space for the trailing '\0' too.
|
|
uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1,
|
|
allocator);
|
|
if (d == NULL)
|
|
return LZMA_MEM_ERROR;
|
|
|
|
coder->extra_tail->data = d;
|
|
coder->sequence = SEQ_EXTRA_DATA_COPY;
|
|
}
|
|
|
|
// Fall through
|
|
|
|
case SEQ_EXTRA_DATA_COPY:
|
|
bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
|
|
coder->extra_tail->data, &coder->pos,
|
|
(size_t)(coder->extra_tail->size));
|
|
|
|
if ((size_t)(coder->extra_tail->size) == coder->pos) {
|
|
coder->extra_tail->data[coder->pos] = '\0';
|
|
coder->pos = 0;
|
|
coder->todo_count = 0;
|
|
coder->sequence = SEQ_EXTRA_ALLOC;
|
|
}
|
|
|
|
break;
|
|
|
|
case SEQ_EXTRA_DUMMY_ALLOC:
|
|
// Not really alloc, just initialize the dummy entry.
|
|
coder->extra_dummy = (lzma_extra){
|
|
.next = NULL,
|
|
.id = 0,
|
|
.size = 0,
|
|
.data = NULL,
|
|
};
|
|
|
|
coder->todo_count = 1;
|
|
coder->sequence = SEQ_EXTRA_DUMMY_ID;
|
|
break;
|
|
|
|
case SEQ_EXTRA_DUMMY_COPY: {
|
|
// Simply skip as many bytes as indicated by Extra Record Size.
|
|
// We don't check lzma_extra_size_max because we don't
|
|
// allocate any memory to hold the data.
|
|
const size_t in_avail = coder->buffer_size - coder->buffer_pos;
|
|
const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
|
|
coder->buffer_pos += skip;
|
|
coder->tmp -= skip;
|
|
|
|
if (coder->tmp == 0) {
|
|
coder->todo_count = 0;
|
|
coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
return LZMA_PROG_ERROR;
|
|
}
|
|
|
|
return LZMA_OK;
|
|
}
|
|
|
|
|
|
static lzma_ret
|
|
metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
|
|
const uint8_t *restrict in, size_t *restrict in_pos,
|
|
size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
|
|
size_t *restrict out_pos lzma_attribute((unused)),
|
|
size_t out_size lzma_attribute((unused)),
|
|
lzma_action action lzma_attribute((unused)))
|
|
{
|
|
bool end_was_reached = false;
|
|
|
|
while (true) {
|
|
// Fill the buffer if it is empty.
|
|
if (coder->buffer_pos == coder->buffer_size) {
|
|
coder->buffer_pos = 0;
|
|
coder->buffer_size = 0;
|
|
|
|
const lzma_ret ret = coder->block_decoder.code(
|
|
coder->block_decoder.coder, allocator,
|
|
in, in_pos, in_size, coder->buffer,
|
|
&coder->buffer_size, LZMA_BUFFER_SIZE,
|
|
LZMA_RUN);
|
|
|
|
switch (ret) {
|
|
case LZMA_OK:
|
|
// Return immediatelly if we got no new data.
|
|
if (coder->buffer_size == 0)
|
|
return LZMA_OK;
|
|
|
|
break;
|
|
|
|
case LZMA_STREAM_END:
|
|
end_was_reached = true;
|
|
break;
|
|
|
|
default:
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
// Process coder->buffer.
|
|
const lzma_ret ret = process(coder, allocator);
|
|
if (ret != LZMA_OK)
|
|
return ret;
|
|
|
|
// On success, process() eats all the input.
|
|
assert(coder->buffer_pos == coder->buffer_size);
|
|
|
|
if (end_was_reached) {
|
|
// Check that the sequence is not in the
|
|
// middle of anything.
|
|
if (coder->todo_count != 0)
|
|
return LZMA_DATA_ERROR;
|
|
|
|
// If Size of Header Metadata Block was not
|
|
// present, we use zero as its size instead
|
|
// of LZMA_VLI_VALUE_UNKNOWN.
|
|
if (coder->metadata->header_metadata_size
|
|
== LZMA_VLI_VALUE_UNKNOWN)
|
|
coder->metadata->header_metadata_size = 0;
|
|
|
|
return LZMA_STREAM_END;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
|
|
{
|
|
lzma_next_coder_end(&coder->block_decoder, allocator);
|
|
lzma_free(coder, allocator);
|
|
return;
|
|
}
|
|
|
|
|
|
static lzma_ret
|
|
metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|
lzma_options_block *options, lzma_metadata *metadata,
|
|
bool want_extra)
|
|
{
|
|
if (options == NULL || metadata == NULL)
|
|
return LZMA_PROG_ERROR;
|
|
|
|
if (next->coder == NULL) {
|
|
next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
|
if (next->coder == NULL)
|
|
return LZMA_MEM_ERROR;
|
|
|
|
next->code = &metadata_decode;
|
|
next->end = &metadata_decoder_end;
|
|
next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
|
|
}
|
|
|
|
metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
|
|
metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
|
|
metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
|
|
metadata->index = NULL;
|
|
metadata->extra = NULL;
|
|
|
|
next->coder->sequence = SEQ_FLAGS;
|
|
next->coder->todo_count = 0;
|
|
next->coder->pos = 0;
|
|
next->coder->tmp = 0;
|
|
next->coder->metadata = metadata;
|
|
next->coder->index_current = NULL;
|
|
next->coder->index_count = 0;
|
|
next->coder->index_total_size = 0;
|
|
next->coder->index_uncompressed_size = 0;
|
|
next->coder->want_extra = want_extra;
|
|
next->coder->extra_tail = NULL;
|
|
next->coder->buffer_pos = 0;
|
|
next->coder->buffer_size = 0;
|
|
|
|
return lzma_block_decoder_init(
|
|
&next->coder->block_decoder, allocator, options);
|
|
}
|
|
|
|
|
|
extern lzma_ret
|
|
lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|
lzma_options_block *options, lzma_metadata *metadata,
|
|
bool want_extra)
|
|
{
|
|
lzma_next_coder_init(metadata_decoder_init, next, allocator,
|
|
options, metadata, want_extra);
|
|
}
|
|
|
|
|
|
extern LZMA_API lzma_ret
|
|
lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
|
|
lzma_metadata *metadata, lzma_bool want_extra)
|
|
{
|
|
lzma_next_strm_init(strm, lzma_metadata_decoder_init,
|
|
options, metadata, want_extra);
|
|
|
|
strm->internal->supported_actions[LZMA_RUN] = true;
|
|
|
|
return LZMA_OK;
|
|
}
|