xz/src/liblzma/common/block_decoder.c

289 lines
8.3 KiB
C
Raw Normal View History

// SPDX-License-Identifier: 0BSD
2007-12-08 22:42:33 +00:00
///////////////////////////////////////////////////////////////////////////////
//
/// \file block_decoder.c
/// \brief Decodes .xz Blocks
2007-12-08 22:42:33 +00:00
//
// Author: Lasse Collin
2007-12-08 22:42:33 +00:00
//
///////////////////////////////////////////////////////////////////////////////
#include "block_decoder.h"
#include "filter_decoder.h"
2007-12-08 22:42:33 +00:00
#include "check.h"
typedef struct {
2007-12-08 22:42:33 +00:00
enum {
SEQ_CODE,
SEQ_PADDING,
SEQ_CHECK,
2007-12-08 22:42:33 +00:00
} sequence;
/// The filters in the chain; initialized with lzma_raw_decoder_init().
lzma_next_coder next;
/// Decoding options; we also write Compressed Size and Uncompressed
/// Size back to this structure when the decoding has been finished.
lzma_block *block;
2007-12-08 22:42:33 +00:00
/// Compressed Size calculated while decoding
2007-12-08 22:42:33 +00:00
lzma_vli compressed_size;
/// Uncompressed Size calculated while decoding
2007-12-08 22:42:33 +00:00
lzma_vli uncompressed_size;
/// Maximum allowed Compressed Size; this takes into account the
/// size of the Block Header and Check fields when Compressed Size
/// is unknown.
lzma_vli compressed_limit;
2007-12-08 22:42:33 +00:00
/// Maximum allowed Uncompressed Size.
lzma_vli uncompressed_limit;
/// Position when reading the Check field
size_t check_pos;
2007-12-08 22:42:33 +00:00
/// Check of the uncompressed data
lzma_check_state check;
/// True if the integrity check won't be calculated and verified.
bool ignore_check;
} lzma_block_coder;
2007-12-08 22:42:33 +00:00
static inline bool
is_size_valid(lzma_vli size, lzma_vli reference)
{
return reference == LZMA_VLI_UNKNOWN || reference == size;
}
2007-12-08 22:42:33 +00:00
static lzma_ret
block_decode(void *coder_ptr, const lzma_allocator *allocator,
2007-12-08 22:42:33 +00:00
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size, lzma_action action)
{
lzma_block_coder *coder = coder_ptr;
2007-12-08 22:42:33 +00:00
switch (coder->sequence) {
case SEQ_CODE: {
const size_t in_start = *in_pos;
const size_t out_start = *out_pos;
// Limit the amount of input and output space that we give
// to the raw decoder based on the information we have
// (or don't have) from Block Header.
const size_t in_stop = *in_pos + (size_t)my_min(
in_size - *in_pos,
coder->compressed_limit - coder->compressed_size);
const size_t out_stop = *out_pos + (size_t)my_min(
out_size - *out_pos,
coder->uncompressed_limit - coder->uncompressed_size);
const lzma_ret ret = coder->next.code(coder->next.coder,
allocator, in, in_pos, in_stop,
out, out_pos, out_stop, action);
2007-12-08 22:42:33 +00:00
const size_t in_used = *in_pos - in_start;
const size_t out_used = *out_pos - out_start;
// Because we have limited the input and output sizes,
// we know that these cannot grow too big or overflow.
coder->compressed_size += in_used;
coder->uncompressed_size += out_used;
if (ret == LZMA_OK) {
const bool comp_done = coder->compressed_size
== coder->block->compressed_size;
const bool uncomp_done = coder->uncompressed_size
== coder->block->uncompressed_size;
// If both input and output amounts match the sizes
// in Block Header but we still got LZMA_OK instead
// of LZMA_STREAM_END, the file is broken.
if (comp_done && uncomp_done)
return LZMA_DATA_ERROR;
// If the decoder has consumed all the input that it
// needs but it still couldn't fill the output buffer
// or return LZMA_STREAM_END, the file is broken.
if (comp_done && *out_pos < out_size)
return LZMA_DATA_ERROR;
// If the decoder has produced all the output but
// it still didn't return LZMA_STREAM_END or consume
// more input (for example, detecting an end of
// payload marker may need more input but produce
// no output) the file is broken.
if (uncomp_done && *in_pos < in_size)
return LZMA_DATA_ERROR;
}
2007-12-08 22:42:33 +00:00
liblzma: Avoid null pointer + 0 (undefined behavior in C). In the C99 and C17 standards, section 6.5.6 paragraph 8 means that adding 0 to a null pointer is undefined behavior. As of writing, "clang -fsanitize=undefined" (Clang 15) diagnoses this. However, I'm not aware of any compiler that would take advantage of this when optimizing (Clang 15 included). It's good to avoid this anyway since compilers might some day infer that pointer arithmetic implies that the pointer is not NULL. That is, the following foo() would then unconditionally return 0, even for foo(NULL, 0): void bar(char *a, char *b); int foo(char *a, size_t n) { bar(a, a + n); return a == NULL; } In contrast to C, C++ explicitly allows null pointer + 0. So if the above is compiled as C++ then there is no undefined behavior in the foo(NULL, 0) call. To me it seems that changing the C standard would be the sane thing to do (just add one sentence) as it would ensure that a huge amount of old code won't break in the future. Based on web searches it seems that a large number of codebases (where null pointer + 0 occurs) are being fixed instead to be future-proof in case compilers will some day optimize based on it (like making the above foo(NULL, 0) return 0) which in the worst case will cause security bugs. Some projects don't plan to change it. For example, gnulib and thus many GNU tools currently require that null pointer + 0 is defined: https://lists.gnu.org/archive/html/bug-gnulib/2021-11/msg00000.html https://www.gnu.org/software/gnulib/manual/html_node/Other-portability-assumptions.html In XZ Utils null pointer + 0 issue should be fixed after this commit. This adds a few if-statements and thus branches to avoid null pointer + 0. These check for size > 0 instead of ptr != NULL because this way bugs where size > 0 && ptr == NULL will likely get caught quickly. None of them are in hot spots so it shouldn't matter for performance. A little less readable version would be replacing ptr + offset with offset != 0 ? ptr + offset : ptr or creating a macro for it: #define my_ptr_add(ptr, offset) \ ((offset) != 0 ? ((ptr) + (offset)) : (ptr)) Checking for offset != 0 instead of ptr != NULL allows GCC >= 8.1, Clang >= 7, and Clang-based ICX to optimize it to the very same code as ptr + offset. That is, it won't create a branch. So for hot code this could be a good solution to avoid null pointer + 0. Unfortunately other compilers like ICC 2021 or MSVC 19.33 (VS2022) will create a branch from my_ptr_add(). Thanks to Marcin Kowalczyk for reporting the problem: https://github.com/tukaani-project/xz/issues/36
2023-02-21 20:57:10 +00:00
// Don't waste time updating the integrity check if it will be
// ignored. Also skip it if no new output was produced. This
// avoids null pointer + 0 (undefined behavior) when out == 0.
if (!coder->ignore_check && out_used > 0)
lzma_check_update(&coder->check, coder->block->check,
out + out_start, out_used);
2007-12-08 22:42:33 +00:00
if (ret != LZMA_STREAM_END)
return ret;
// Compressed and Uncompressed Sizes are now at their final
// values. Verify that they match the values given to us.
if (!is_size_valid(coder->compressed_size,
coder->block->compressed_size)
|| !is_size_valid(coder->uncompressed_size,
coder->block->uncompressed_size))
return LZMA_DATA_ERROR;
// Copy the values into coder->block. The caller
// may use this information to construct Index.
coder->block->compressed_size = coder->compressed_size;
coder->block->uncompressed_size = coder->uncompressed_size;
coder->sequence = SEQ_PADDING;
2007-12-08 22:42:33 +00:00
}
// Fall through
2007-12-08 22:42:33 +00:00
case SEQ_PADDING:
// Compressed Data is padded to a multiple of four bytes.
while (coder->compressed_size & 3) {
if (*in_pos >= in_size)
return LZMA_OK;
// We use compressed_size here just get the Padding
// right. The actual Compressed Size was stored to
// coder->block already, and won't be modified by
// us anymore.
++coder->compressed_size;
if (in[(*in_pos)++] != 0x00)
2007-12-08 22:42:33 +00:00
return LZMA_DATA_ERROR;
}
if (coder->block->check == LZMA_CHECK_NONE)
return LZMA_STREAM_END;
2007-12-08 22:42:33 +00:00
if (!coder->ignore_check)
lzma_check_finish(&coder->check, coder->block->check);
coder->sequence = SEQ_CHECK;
2007-12-08 22:42:33 +00:00
// Fall through
2007-12-08 22:42:33 +00:00
case SEQ_CHECK: {
const size_t check_size = lzma_check_size(coder->block->check);
lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check,
&coder->check_pos, check_size);
if (coder->check_pos < check_size)
return LZMA_OK;
// Validate the Check only if we support it.
// coder->check.buffer may be uninitialized
// when the Check ID is not supported.
if (!coder->ignore_check
&& lzma_check_is_supported(coder->block->check)
&& memcmp(coder->block->raw_check,
coder->check.buffer.u8,
check_size) != 0)
return LZMA_DATA_ERROR;
2007-12-08 22:42:33 +00:00
return LZMA_STREAM_END;
2007-12-08 22:42:33 +00:00
}
}
2007-12-08 22:42:33 +00:00
return LZMA_PROG_ERROR;
2007-12-08 22:42:33 +00:00
}
static void
block_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
2007-12-08 22:42:33 +00:00
{
lzma_block_coder *coder = coder_ptr;
lzma_next_end(&coder->next, allocator);
2007-12-08 22:42:33 +00:00
lzma_free(coder, allocator);
return;
}
extern lzma_ret
lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
lzma_block *block)
2007-12-08 22:42:33 +00:00
{
lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);
// Validate the options. lzma_block_unpadded_size() does that for us
// except for Uncompressed Size and filters. Filters are validated
// by the raw decoder.
if (lzma_block_unpadded_size(block) == 0
|| !lzma_vli_is_valid(block->uncompressed_size))
return LZMA_PROG_ERROR;
// Allocate *next->coder if needed.
lzma_block_coder *coder = next->coder;
if (coder == NULL) {
coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
if (coder == NULL)
2007-12-08 22:42:33 +00:00
return LZMA_MEM_ERROR;
next->coder = coder;
2007-12-08 22:42:33 +00:00
next->code = &block_decode;
next->end = &block_decoder_end;
coder->next = LZMA_NEXT_CODER_INIT;
2007-12-08 22:42:33 +00:00
}
// Basic initializations
coder->sequence = SEQ_CODE;
coder->block = block;
coder->compressed_size = 0;
coder->uncompressed_size = 0;
// If Compressed Size is not known, we calculate the maximum allowed
// value so that encoded size of the Block (including Block Padding)
// is still a valid VLI and a multiple of four.
coder->compressed_limit
= block->compressed_size == LZMA_VLI_UNKNOWN
? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
- block->header_size
- lzma_check_size(block->check)
: block->compressed_size;
// With Uncompressed Size this is simpler. If Block Header lacks
// the size info, then LZMA_VLI_MAX is the maximum possible
// Uncompressed Size.
coder->uncompressed_limit
= block->uncompressed_size == LZMA_VLI_UNKNOWN
? LZMA_VLI_MAX
: block->uncompressed_size;
// Initialize the check. It's caller's problem if the Check ID is not
// supported, and the Block decoder cannot verify the Check field.
// Caller can test lzma_check_is_supported(block->check).
coder->check_pos = 0;
lzma_check_init(&coder->check, block->check);
coder->ignore_check = block->version >= 1
? block->ignore_check : false;
// Initialize the filter chain.
return lzma_raw_decoder_init(&coder->next, allocator,
block->filters);
2007-12-08 22:42:33 +00:00
}
extern LZMA_API(lzma_ret)
lzma_block_decoder(lzma_stream *strm, lzma_block *block)
2007-12-08 22:42:33 +00:00
{
lzma_next_strm_init(lzma_block_decoder_init, strm, block);
2007-12-08 22:42:33 +00:00
strm->internal->supported_actions[LZMA_RUN] = true;
strm->internal->supported_actions[LZMA_FINISH] = true;
2007-12-08 22:42:33 +00:00
return LZMA_OK;
}