// SPDX-License-Identifier: 0BSD /////////////////////////////////////////////////////////////////////////////// // /// \file test_microlzma.c /// \brief Tests MicroLZMA encoding and decoding // // Author: Jia Tan // /////////////////////////////////////////////////////////////////////////////// #include "tests.h" #define BUFFER_SIZE 1024 #ifdef HAVE_ENCODER_LZMA1 // MicroLZMA encoded "Hello\nWorld\n" output size in bytes. #define ENCODED_OUTPUT_SIZE 17 // Byte array of "Hello\nWorld\n". This is used for various encoder tests. static const uint8_t hello_world[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x0A }; // This is the CRC32 value of the MicroLZMA encoding of "Hello\nWorld\n". // The settings used were based on LZMA_PRESET_DEFAULT as of liblzma 5.6.0. // This assumes MicroLZMA is correct in liblzma 5.6.0, which is safe // considering the encoded "Hello\nWorld\n" can successfully be decoded at // this time. This is to test for regressions that cause MicroLZMA output // to change. static const uint32_t hello_world_encoded_crc = 0x3CDE40A8; // Function implementation borrowed from lzma_decoder.c. It is needed to // ensure the first byte of a MicroLZMA stream is set correctly with the // negation of the LZMA properties. static bool lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) { if (byte > (4 * 5 + 4) * 9 + 8) return true; // See the file format specification to understand this. options->pb = byte / (9 * 5); byte -= options->pb * 9 * 5; options->lp = byte / 9; options->lc = byte - options->lp * 9; return options->lc + options->lp > LZMA_LCLP_MAX; } /////////////////// // Encoder tests // /////////////////// // This tests a few of the basic options. These options are not unique to // MicroLZMA in any way, its mostly ensuring that the options are actually // being checked before initializing the decoder internals. static void test_encode_options(void) { lzma_stream strm = LZMA_STREAM_INIT; lzma_options_lzma opt_lzma; // Initialize with default options. assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); // NULL stream assert_lzma_ret(lzma_microlzma_encoder(NULL, &opt_lzma), LZMA_PROG_ERROR); // lc/lp/pb = 5/0/2 (lc invalid) opt_lzma.lc = 5; opt_lzma.lp = 0; opt_lzma.pb = 2; assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OPTIONS_ERROR); // lc/lp/pb = 0/5/2 (lp invalid) opt_lzma.lc = 0; opt_lzma.lp = 5; opt_lzma.pb = 2; assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OPTIONS_ERROR); // lc/lp/pb = 3/2/2 (lc + lp invalid) opt_lzma.lc = 3; opt_lzma.lp = 2; opt_lzma.pb = 2; assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OPTIONS_ERROR); // lc/lp/pb = 3/0/5 (pb invalid) opt_lzma.lc = 3; opt_lzma.lp = 0; opt_lzma.pb = 5; assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OPTIONS_ERROR); // Zero out lp, pb, lc options to not interfere with later tests. opt_lzma.lp = 0; opt_lzma.pb = 0; opt_lzma.lc = 0; // Set invalid dictionary size. opt_lzma.dict_size = LZMA_DICT_SIZE_MIN - 1; assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OPTIONS_ERROR); // Maximum dictionary size for the encoder, as described in lzma12.h // is 1.5 GiB. opt_lzma.dict_size = (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + 1; assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OPTIONS_ERROR); lzma_end(&strm); } static void test_encode_basic(void) { lzma_stream strm = LZMA_STREAM_INIT; lzma_options_lzma opt_lzma; // The lzma_lzma_preset return value is inverse of what it perhaps // should be, that is, it returns false on success. assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); // Initialize the encoder using the default options. assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); uint8_t output[BUFFER_SIZE]; strm.next_in = hello_world; strm.avail_in = sizeof(hello_world); strm.next_out = output; strm.avail_out = sizeof(output); // Everything must be encoded in one lzma_code() call. assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); // Check that the entire input was consumed. assert_uint_eq(strm.total_in, sizeof(hello_world)); // Check that the first byte in the output stream is not 0x00. // In a regular raw LZMA stream the first byte is always 0x00. // In MicroLZMA the first byte replaced by the bitwise-negation // of the LZMA properties. assert_uint(output[0], !=, 0x00); const uint8_t props = ~output[0]; lzma_options_lzma test_options; assert_false(lzma_lzma_lclppb_decode(&test_options, props)); assert_uint_eq(opt_lzma.lc, test_options.lc); assert_uint_eq(opt_lzma.lp, test_options.lp); assert_uint_eq(opt_lzma.pb, test_options.pb); // Compute the check over the output data. This is compared to // the expected check value. const uint32_t check_val = lzma_crc32(output, strm.total_out, 0); assert_uint_eq(check_val, hello_world_encoded_crc); lzma_end(&strm); } // This tests the behavior when strm.avail_out is so small it cannot hold // the header plus 1 encoded byte (< 6). static void test_encode_small_out(void) { lzma_stream strm = LZMA_STREAM_INIT; lzma_options_lzma opt_lzma; assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); uint8_t output[BUFFER_SIZE]; strm.next_in = hello_world; strm.avail_in = sizeof(hello_world); strm.next_out = output; strm.avail_out = 5; // LZMA_PROG_ERROR is expected when strm.avail_out < 6 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_PROG_ERROR); // The encoder must be reset because coders cannot be used again // after returning LZMA_PROG_ERROR. assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); // Reset strm.avail_out to be > 6, but not enough to hold all of the // compressed data. strm.avail_out = ENCODED_OUTPUT_SIZE - 1; // Encoding should not return an error now. assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); assert_uint(strm.total_in, <, sizeof(hello_world)); lzma_end(&strm); } // LZMA_FINISH is the only supported action. All others must // return LZMA_PROG_ERROR. static void test_encode_actions(void) { lzma_stream strm = LZMA_STREAM_INIT; lzma_options_lzma opt_lzma; assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); const lzma_action actions[] = { LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER, }; for (size_t i = 0; i < ARRAY_SIZE(actions); ++i) { assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); uint8_t output[BUFFER_SIZE]; strm.next_in = hello_world; strm.avail_in = sizeof(hello_world); strm.next_out = output; strm.avail_out = sizeof(output); assert_lzma_ret(lzma_code(&strm, actions[i]), LZMA_PROG_ERROR); } lzma_end(&strm); } #endif // HAVE_ENCODER_LZMA1 /////////////////// // Decoder tests // /////////////////// #if defined(HAVE_DECODER_LZMA1) && defined(HAVE_ENCODER_LZMA1) // Byte array of "Goodbye World!". This is used for various decoder tests. static const uint8_t goodbye_world[] = { 0x47, 0x6F, 0x6F, 0x64, 0x62, 0x79, 0x65, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21 }; static uint8_t *goodbye_world_encoded = NULL; static size_t goodbye_world_encoded_size = 0; // Helper function to encode data and return the compressed size. static size_t basic_microlzma_encode(const uint8_t *input, size_t in_size, uint8_t **compressed) { lzma_stream strm = LZMA_STREAM_INIT; lzma_options_lzma opt_lzma; // Lazy way to set the output size since the input should never // inflate by much in these simple test cases. This is tested to // be large enough after encoding to fit the entire input, so if // this assumption does not hold then this will fail. const size_t out_size = in_size << 1; *compressed = tuktest_malloc(out_size); // Always encode with the default options for simplicity. if (lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)) goto decoder_setup_error; if (lzma_microlzma_encoder(&strm, &opt_lzma) != LZMA_OK) goto decoder_setup_error; strm.next_in = input; strm.avail_in = in_size; strm.next_out = *compressed; strm.avail_out = out_size; if (lzma_code(&strm, LZMA_FINISH) != LZMA_STREAM_END) goto decoder_setup_error; // Check that the entire input was consumed and that it fit into // the output buffer. if (strm.total_in != in_size) goto decoder_setup_error; lzma_end(&strm); // lzma_end() doesn't touch other members of lzma_stream than // lzma_stream.internal so using strm.total_out here is fine. return strm.total_out; decoder_setup_error: tuktest_error("Failed to initialize decoder tests"); return 0; } static void test_decode_options(void) { // NULL stream assert_lzma_ret(lzma_microlzma_decoder(NULL, BUFFER_SIZE, sizeof(hello_world), true, LZMA_DICT_SIZE_DEFAULT), LZMA_PROG_ERROR); // Uncompressed size larger than max lzma_stream strm = LZMA_STREAM_INIT; assert_lzma_ret(lzma_microlzma_decoder(&strm, BUFFER_SIZE, LZMA_VLI_MAX + 1, true, LZMA_DICT_SIZE_DEFAULT), LZMA_OPTIONS_ERROR); } // Test that decoding succeeds when uncomp_size is correct regardless of // the value of uncomp_size_is_exact. static void test_decode_uncomp_size_is_exact(void) { lzma_stream strm = LZMA_STREAM_INIT; assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size, sizeof(goodbye_world), true, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); uint8_t output[BUFFER_SIZE]; strm.next_in = goodbye_world_encoded; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END); assert_uint_eq(strm.total_in, goodbye_world_encoded_size); assert_uint_eq(strm.total_out, sizeof(goodbye_world)); assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); // Reset decoder with uncomp_size_is_exact set to false and // uncomp_size set to correct value. Also test using the // uncompressed size as the dictionary size. assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size, sizeof(goodbye_world), false, sizeof(goodbye_world)), LZMA_OK); strm.next_in = goodbye_world_encoded; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END); assert_uint_eq(strm.total_in, goodbye_world_encoded_size); assert_uint_eq(strm.total_out, sizeof(goodbye_world)); assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); lzma_end(&strm); } // This tests decoding when MicroLZMA decoder is called with // an incorrect uncompressed size. static void test_decode_uncomp_size_wrong(void) { lzma_stream strm = LZMA_STREAM_INIT; assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size, sizeof(goodbye_world) + 1, false, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); uint8_t output[BUFFER_SIZE]; strm.next_in = goodbye_world_encoded; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); // LZMA_OK should be returned because the input size given was // larger than the actual encoded size. The decoder is expecting // more input to possibly fill the uncompressed size that was set. assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); assert_uint_eq(strm.total_out, sizeof(goodbye_world)); assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); // Next, test with uncomp_size_is_exact set. assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size, sizeof(goodbye_world) + 1, true, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); strm.next_in = goodbye_world_encoded; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); // No error detected, even though all input was consumed and there // is more room in the output buffer. // // FIXME? LZMA_FINISH tells that no more input is coming and // the MicroLZMA decoder knows the exact compressed size from // the initialization as well. So should it return LZMA_DATA_ERROR // on the first call instead of relying on the generic lzma_code() // logic to eventually get LZMA_BUF_ERROR? assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_BUF_ERROR); assert_uint_eq(strm.total_out, sizeof(goodbye_world)); assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); // Reset stream with uncomp_size smaller than the real // uncompressed size. assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size, ARRAY_SIZE(hello_world) - 1, true, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); strm.next_in = goodbye_world_encoded; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); // This case actually results in an error since it decodes the full // uncompressed size but the range coder is not in the proper state // for the stream to end. assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR); lzma_end(&strm); } static void test_decode_comp_size_wrong(void) { lzma_stream strm = LZMA_STREAM_INIT; // goodbye_world_encoded_size + 1 is safe because extra space was // allocated for goodbye_world_encoded. The extra space isn't // initialized but it shouldn't be read either, thus Valgrind // has to remain happy with this code. assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size + 1, sizeof(goodbye_world), true, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); uint8_t output[BUFFER_SIZE]; strm.next_in = goodbye_world_encoded; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); // When uncomp_size_is_exact is set, the compressed size must be // correct or else LZMA_DATA_ERROR is returned. assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_DATA_ERROR); assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size + 1, sizeof(goodbye_world), false, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); strm.next_in = goodbye_world_encoded; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); // When uncomp_size_is_exact is not set, the decoder does not // detect when the compressed size is wrong as long as all of the // expected output has been decoded. This is because the decoder // assumes that the real uncompressed size might be bigger than // the specified value and in that case more input might be needed // as well. assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); lzma_end(&strm); } static void test_decode_bad_lzma_properties(void) { // Alter first byte to encode invalid LZMA properties. uint8_t *compressed = tuktest_malloc(goodbye_world_encoded_size); memcpy(compressed, goodbye_world_encoded, goodbye_world_encoded_size); // lc=3, lp=2, pb=2 compressed[0] = (uint8_t)~0x6FU; lzma_stream strm = LZMA_STREAM_INIT; assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size, sizeof(goodbye_world), false, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); uint8_t output[BUFFER_SIZE]; strm.next_in = compressed; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_OPTIONS_ERROR); // Use valid, but incorrect LZMA properties. // lc=3, lp=1, pb=2 compressed[0] = (uint8_t)~0x66; assert_lzma_ret(lzma_microlzma_decoder(&strm, goodbye_world_encoded_size, ARRAY_SIZE(goodbye_world), true, LZMA_DICT_SIZE_DEFAULT), LZMA_OK); strm.next_in = compressed; strm.avail_in = goodbye_world_encoded_size; strm.next_out = output; strm.avail_out = sizeof(output); assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR); lzma_end(&strm); } #endif extern int main(int argc, char **argv) { tuktest_start(argc, argv); #ifndef HAVE_ENCODER_LZMA1 tuktest_early_skip("LZMA1 encoder disabled"); #else tuktest_run(test_encode_options); tuktest_run(test_encode_basic); tuktest_run(test_encode_small_out); tuktest_run(test_encode_actions); // MicroLZMA decoder tests require the basic encoder functionality. # ifdef HAVE_DECODER_LZMA1 goodbye_world_encoded_size = basic_microlzma_encode(goodbye_world, sizeof(goodbye_world), &goodbye_world_encoded); tuktest_run(test_decode_options); tuktest_run(test_decode_uncomp_size_is_exact); tuktest_run(test_decode_uncomp_size_wrong); tuktest_run(test_decode_comp_size_wrong); tuktest_run(test_decode_bad_lzma_properties); # endif return tuktest_end(); #endif }