xz/tests/test_microlzma.c

561 lines
16 KiB
C
Raw Normal View History

// SPDX-License-Identifier: 0BSD
2024-02-23 12:58:36 +00:00
///////////////////////////////////////////////////////////////////////////////
//
/// \file test_microlzma.c
/// \brief Tests MicroLZMA encoding and decoding
//
// Author: Jia Tan
//
///////////////////////////////////////////////////////////////////////////////
#include "tests.h"
#define BUFFER_SIZE 1024
2024-02-23 12:58:36 +00:00
#ifdef HAVE_ENCODER_LZMA1
// MicroLZMA encoded "Hello\nWorld\n" output size in bytes.
#define ENCODED_OUTPUT_SIZE 17
// Byte array of "Hello\nWorld\n". This is used for various encoder tests.
static const uint8_t hello_world[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A,
0x57, 0x6F, 0x72, 0x6C, 0x64, 0x0A };
// This is the CRC32 value of the MicroLZMA encoding of "Hello\nWorld\n".
// The settings used were based on LZMA_PRESET_DEFAULT as of liblzma 5.6.0.
// This assumes MicroLZMA is correct in liblzma 5.6.0, which is safe
// considering the encoded "Hello\nWorld\n" can successfully be decoded at
// this time. This is to test for regressions that cause MicroLZMA output
// to change.
static const uint32_t hello_world_encoded_crc = 0x3CDE40A8;
2024-02-23 12:58:36 +00:00
// Function implementation borrowed from lzma_decoder.c. It is needed to
// ensure the first byte of a MicroLZMA stream is set correctly with the
// negation of the LZMA properties.
static bool
lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
{
if (byte > (4 * 5 + 4) * 9 + 8)
return true;
// See the file format specification to understand this.
options->pb = byte / (9 * 5);
byte -= options->pb * 9 * 5;
options->lp = byte / 9;
options->lc = byte - options->lp * 9;
return options->lc + options->lp > LZMA_LCLP_MAX;
}
///////////////////
// Encoder tests //
///////////////////
// This tests a few of the basic options. These options are not unique to
// MicroLZMA in any way, its mostly ensuring that the options are actually
// being checked before initializing the decoder internals.
static void
test_encode_options(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
lzma_options_lzma opt_lzma;
// Initialize with default options.
assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
// NULL stream
assert_lzma_ret(lzma_microlzma_encoder(NULL, &opt_lzma),
LZMA_PROG_ERROR);
// lc/lp/pb = 5/0/2 (lc invalid)
opt_lzma.lc = 5;
opt_lzma.lp = 0;
opt_lzma.pb = 2;
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
LZMA_OPTIONS_ERROR);
// lc/lp/pb = 0/5/2 (lp invalid)
opt_lzma.lc = 0;
opt_lzma.lp = 5;
opt_lzma.pb = 2;
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
LZMA_OPTIONS_ERROR);
// lc/lp/pb = 3/2/2 (lc + lp invalid)
opt_lzma.lc = 3;
opt_lzma.lp = 2;
opt_lzma.pb = 2;
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
LZMA_OPTIONS_ERROR);
// lc/lp/pb = 3/0/5 (pb invalid)
opt_lzma.lc = 3;
opt_lzma.lp = 0;
opt_lzma.pb = 5;
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
LZMA_OPTIONS_ERROR);
// Zero out lp, pb, lc options to not interfere with later tests.
opt_lzma.lp = 0;
opt_lzma.pb = 0;
opt_lzma.lc = 0;
// Set invalid dictionary size.
opt_lzma.dict_size = LZMA_DICT_SIZE_MIN - 1;
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
LZMA_OPTIONS_ERROR);
// Maximum dictionary size for the encoder, as described in lzma12.h
// is 1.5 GiB.
opt_lzma.dict_size = (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + 1;
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
LZMA_OPTIONS_ERROR);
lzma_end(&strm);
}
static void
test_encode_basic(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
lzma_options_lzma opt_lzma;
// The lzma_lzma_preset return value is inverse of what it perhaps
// should be, that is, it returns false on success.
2024-02-23 12:58:36 +00:00
assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
// Initialize the encoder using the default options.
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
uint8_t output[BUFFER_SIZE];
strm.next_in = hello_world;
strm.avail_in = sizeof(hello_world);
2024-02-23 12:58:36 +00:00
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
// Everything must be encoded in one lzma_code() call.
assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
// Check that the entire input was consumed.
assert_uint_eq(strm.total_in, sizeof(hello_world));
2024-02-23 12:58:36 +00:00
// Check that the first byte in the output stream is not 0x00.
// In a regular raw LZMA stream the first byte is always 0x00.
// In MicroLZMA the first byte replaced by the bitwise-negation
2024-02-23 12:58:36 +00:00
// of the LZMA properties.
assert_uint(output[0], !=, 0x00);
2024-02-23 12:58:36 +00:00
const uint8_t props = ~output[0];
2024-02-23 12:58:36 +00:00
lzma_options_lzma test_options;
assert_false(lzma_lzma_lclppb_decode(&test_options, props));
assert_uint_eq(opt_lzma.lc, test_options.lc);
assert_uint_eq(opt_lzma.lp, test_options.lp);
assert_uint_eq(opt_lzma.pb, test_options.pb);
// Compute the check over the output data. This is compared to
// the expected check value.
const uint32_t check_val = lzma_crc32(output, strm.total_out, 0);
2024-02-23 12:58:36 +00:00
assert_uint_eq(check_val, hello_world_encoded_crc);
lzma_end(&strm);
}
// This tests the behavior when strm.avail_out is so small it cannot hold
// the header plus 1 encoded byte (< 6).
static void
test_encode_small_out(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
lzma_options_lzma opt_lzma;
assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
uint8_t output[BUFFER_SIZE];
strm.next_in = hello_world;
strm.avail_in = sizeof(hello_world);
2024-02-23 12:58:36 +00:00
strm.next_out = output;
strm.avail_out = 5;
// LZMA_PROG_ERROR is expected when strm.avail_out < 6
assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_PROG_ERROR);
// The encoder must be reset because coders cannot be used again
// after returning LZMA_PROG_ERROR.
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
// Reset strm.avail_out to be > 6, but not enough to hold all of the
// compressed data.
strm.avail_out = ENCODED_OUTPUT_SIZE - 1;
// Encoding should not return an error now.
assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
assert_uint(strm.total_in, <, sizeof(hello_world));
2024-02-23 12:58:36 +00:00
lzma_end(&strm);
}
// LZMA_FINISH is the only supported action. All others must
// return LZMA_PROG_ERROR.
static void
test_encode_actions(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
lzma_options_lzma opt_lzma;
assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
const lzma_action actions[] = {
LZMA_RUN,
LZMA_SYNC_FLUSH,
LZMA_FULL_FLUSH,
LZMA_FULL_BARRIER,
};
2024-02-23 12:58:36 +00:00
for (size_t i = 0; i < ARRAY_SIZE(actions); ++i) {
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
LZMA_OK);
uint8_t output[BUFFER_SIZE];
strm.next_in = hello_world;
strm.avail_in = sizeof(hello_world);
2024-02-23 12:58:36 +00:00
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_code(&strm, actions[i]),
LZMA_PROG_ERROR);
}
lzma_end(&strm);
}
#endif // HAVE_ENCODER_LZMA1
2024-02-23 12:58:36 +00:00
///////////////////
// Decoder tests //
///////////////////
#if defined(HAVE_DECODER_LZMA1) && defined(HAVE_ENCODER_LZMA1)
// Byte array of "Goodbye World!". This is used for various decoder tests.
static const uint8_t goodbye_world[] = { 0x47, 0x6F, 0x6F, 0x64, 0x62,
0x79, 0x65, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21 };
2024-02-23 12:58:36 +00:00
static uint8_t *goodbye_world_encoded = NULL;
static size_t goodbye_world_encoded_size = 0;
2024-02-23 12:58:36 +00:00
// Helper function to encode data and return the compressed size.
static size_t
basic_microlzma_encode(const uint8_t *input, size_t in_size,
2024-02-23 12:58:36 +00:00
uint8_t **compressed)
{
lzma_stream strm = LZMA_STREAM_INIT;
lzma_options_lzma opt_lzma;
// Lazy way to set the output size since the input should never
// inflate by much in these simple test cases. This is tested to
// be large enough after encoding to fit the entire input, so if
// this assumption does not hold then this will fail.
const size_t out_size = in_size << 1;
2024-02-23 12:58:36 +00:00
*compressed = tuktest_malloc(out_size);
// Always encode with the default options for simplicity.
if (lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT))
goto decoder_setup_error;
if (lzma_microlzma_encoder(&strm, &opt_lzma) != LZMA_OK)
goto decoder_setup_error;
strm.next_in = input;
strm.avail_in = in_size;
strm.next_out = *compressed;
strm.avail_out = out_size;
if (lzma_code(&strm, LZMA_FINISH) != LZMA_STREAM_END)
goto decoder_setup_error;
// Check that the entire input was consumed and that it fit into
// the output buffer.
2024-02-23 12:58:36 +00:00
if (strm.total_in != in_size)
goto decoder_setup_error;
lzma_end(&strm);
// lzma_end() doesn't touch other members of lzma_stream than
// lzma_stream.internal so using strm.total_out here is fine.
return strm.total_out;
2024-02-23 12:58:36 +00:00
decoder_setup_error:
tuktest_error("Failed to initialize decoder tests");
return 0;
}
static void
test_decode_options(void)
{
// NULL stream
assert_lzma_ret(lzma_microlzma_decoder(NULL, BUFFER_SIZE,
sizeof(hello_world), true,
2024-02-23 12:58:36 +00:00
LZMA_DICT_SIZE_DEFAULT), LZMA_PROG_ERROR);
// Uncompressed size larger than max
lzma_stream strm = LZMA_STREAM_INIT;
assert_lzma_ret(lzma_microlzma_decoder(&strm, BUFFER_SIZE,
LZMA_VLI_MAX + 1, true, LZMA_DICT_SIZE_DEFAULT),
LZMA_OPTIONS_ERROR);
}
// Test that decoding succeeds when uncomp_size is correct regardless of
2024-02-23 12:58:36 +00:00
// the value of uncomp_size_is_exact.
static void
test_decode_uncomp_size_is_exact(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size,
sizeof(goodbye_world), true,
2024-02-23 12:58:36 +00:00
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
uint8_t output[BUFFER_SIZE];
strm.next_in = goodbye_world_encoded;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
assert_uint_eq(strm.total_in, goodbye_world_encoded_size);
assert_uint_eq(strm.total_out, sizeof(goodbye_world));
assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
2024-02-23 12:58:36 +00:00
// Reset decoder with uncomp_size_is_exact set to false and
// uncomp_size set to correct value. Also test using the
// uncompressed size as the dictionary size.
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size,
sizeof(goodbye_world), false,
sizeof(goodbye_world)), LZMA_OK);
2024-02-23 12:58:36 +00:00
strm.next_in = goodbye_world_encoded;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
assert_uint_eq(strm.total_in, goodbye_world_encoded_size);
assert_uint_eq(strm.total_out, sizeof(goodbye_world));
assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
2024-02-23 12:58:36 +00:00
lzma_end(&strm);
}
// This tests decoding when MicroLZMA decoder is called with
// an incorrect uncompressed size.
static void
test_decode_uncomp_size_wrong(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size,
sizeof(goodbye_world) + 1, false,
2024-02-23 12:58:36 +00:00
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
uint8_t output[BUFFER_SIZE];
strm.next_in = goodbye_world_encoded;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
// LZMA_OK should be returned because the input size given was
// larger than the actual encoded size. The decoder is expecting
// more input to possibly fill the uncompressed size that was set.
assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
assert_uint_eq(strm.total_out, sizeof(goodbye_world));
2024-02-23 12:58:36 +00:00
assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
2024-02-23 12:58:36 +00:00
// Next, test with uncomp_size_is_exact set.
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size,
sizeof(goodbye_world) + 1, true,
2024-02-23 12:58:36 +00:00
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
strm.next_in = goodbye_world_encoded;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
// No error detected, even though all input was consumed and there
// is more room in the output buffer.
assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
assert_uint_eq(strm.total_out, sizeof(goodbye_world));
assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
2024-02-23 12:58:36 +00:00
// Reset stream with uncomp_size smaller than the real
// uncompressed size.
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size,
ARRAY_SIZE(hello_world) - 1, true,
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
strm.next_in = goodbye_world_encoded;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
// This case actually results in an error since it decodes the full
// uncompressed size but the range coder is not in the proper state
// for the stream to end.
assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);
lzma_end(&strm);
}
static void
test_decode_comp_size_wrong(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
// goodbye_world_encoded_size + 1 is safe because extra space was
// allocated for goodbye_world_encoded. The extra space isn't
// initialized but it shouldn't be read either, thus Valgrind
// has to remain happy with this code.
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size + 1,
sizeof(goodbye_world), true,
2024-02-23 12:58:36 +00:00
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
uint8_t output[BUFFER_SIZE];
strm.next_in = goodbye_world_encoded;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
// When uncomp_size_is_exact is set, the compressed size must be
// correct or else LZMA_DATA_ERROR is returned.
assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_DATA_ERROR);
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size + 1,
sizeof(goodbye_world), false,
2024-02-23 12:58:36 +00:00
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
strm.next_in = goodbye_world_encoded;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
// When uncomp_size_is_exact is not set, the decoder does not
// detect when the compressed size is wrong as long as all of the
// expected output has been decoded. This is because the decoder
// assumes that the real uncompressed size might be bigger than
// the specified value and in that case more input might be needed
// as well.
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
lzma_end(&strm);
}
static void
test_decode_bad_lzma_properties(void)
{
// Alter first byte to encode invalid LZMA properties.
uint8_t *compressed = tuktest_malloc(goodbye_world_encoded_size);
memcpy(compressed, goodbye_world_encoded, goodbye_world_encoded_size);
// lc=3, lp=2, pb=2
compressed[0] = (uint8_t)~0x6FU;
lzma_stream strm = LZMA_STREAM_INIT;
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size,
sizeof(goodbye_world), false,
2024-02-23 12:58:36 +00:00
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
uint8_t output[BUFFER_SIZE];
strm.next_in = compressed;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_OPTIONS_ERROR);
// Use valid, but incorrect LZMA properties.
// lc=3, lp=1, pb=2
compressed[0] = (uint8_t)~0x66;
assert_lzma_ret(lzma_microlzma_decoder(&strm,
goodbye_world_encoded_size,
ARRAY_SIZE(goodbye_world), true,
LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
strm.next_in = compressed;
strm.avail_in = goodbye_world_encoded_size;
strm.next_out = output;
strm.avail_out = sizeof(output);
2024-02-23 12:58:36 +00:00
assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);
lzma_end(&strm);
}
#endif
extern int
main(int argc, char **argv)
{
tuktest_start(argc, argv);
#ifndef HAVE_ENCODER_LZMA1
tuktest_early_skip("LZMA1 encoder disabled");
2024-02-23 12:58:36 +00:00
#else
tuktest_run(test_encode_options);
tuktest_run(test_encode_basic);
tuktest_run(test_encode_small_out);
tuktest_run(test_encode_actions);
// MicroLZMA decoder tests require the basic encoder functionality.
# ifdef HAVE_DECODER_LZMA1
2024-02-23 12:58:36 +00:00
goodbye_world_encoded_size = basic_microlzma_encode(goodbye_world,
sizeof(goodbye_world), &goodbye_world_encoded);
2024-02-23 12:58:36 +00:00
tuktest_run(test_decode_options);
tuktest_run(test_decode_uncomp_size_is_exact);
tuktest_run(test_decode_uncomp_size_wrong);
tuktest_run(test_decode_comp_size_wrong);
tuktest_run(test_decode_bad_lzma_properties);
# endif
return tuktest_end();
#endif
}