/////////////////////////////////////////////////////////////////////////////// // /// \file subblock_decoder.c /// \brief Decoder of the Subblock filter // // Copyright (C) 2007 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // /////////////////////////////////////////////////////////////////////////////// #include "subblock_decoder.h" #include "subblock_decoder_helper.h" #include "raw_decoder.h" /// Maximum number of consecutive Subblocks with Subblock Type Padding #define PADDING_MAX 31 struct lzma_coder_s { lzma_next_coder next; enum { // These require that there is at least one input // byte available. SEQ_FLAGS, SEQ_FILTER_FLAGS, SEQ_FILTER_END, SEQ_REPEAT_COUNT_1, SEQ_REPEAT_COUNT_2, SEQ_REPEAT_COUNT_3, SEQ_REPEAT_SIZE, SEQ_REPEAT_READ_DATA, SEQ_SIZE_1, SEQ_SIZE_2, SEQ_SIZE_3, // This must be right before SEQ_DATA. // These don't require any input to be available. SEQ_DATA, SEQ_REPEAT_FAST, SEQ_REPEAT_NORMAL, } sequence; /// Number of bytes left in the current Subblock Data field. size_t size; /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown. lzma_vli uncompressed_size; /// Number of consecutive Subblocks with Subblock Type Padding uint32_t padding; /// True when .next.code() has returned LZMA_STREAM_END. bool next_finished; /// True when the Subblock decoder has detected End of Payload Marker. /// This may become true before next_finished becomes true. bool this_finished; /// True if Subfilters are allowed. bool allow_subfilters; /// Indicates if at least one byte of decoded output has been /// produced after enabling Subfilter. bool got_output_with_subfilter; /// Possible subfilter lzma_next_coder subfilter; /// Filter Flags decoder is needed to parse the ID and Properties /// of the subfilter. lzma_next_coder filter_flags_decoder; /// The filter_flags_decoder stores its results here. lzma_options_filter filter_flags; /// Options for the Subblock decoder helper. This is used to tell /// the helper when it should return LZMA_STREAM_END to the subfilter. lzma_options_subblock_helper helper; struct { /// How many times buffer should be repeated size_t count; /// Size of the buffer size_t size; /// Position in the buffer size_t pos; /// Buffer to hold the data to be repeated uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; } repeat; /// Temporary buffer needed when the Subblock filter is not the last /// filter in the chain. The output of the next filter is first /// decoded into buffer[], which is then used as input for the actual /// Subblock decoder. struct { size_t pos; size_t size; uint8_t buffer[LZMA_BUFFER_SIZE]; } temp; }; /// Values of valid Subblock Flags enum { FLAG_PADDING, FLAG_EOPM, FLAG_DATA, FLAG_REPEAT, FLAG_SET_SUBFILTER, FLAG_END_SUBFILTER, }; /// Substracts size from coder->uncompressed_size uncompressed size is known /// and size isn't bigger than coder->uncompressed_size. static inline bool update_uncompressed_size(lzma_coder *coder, size_t size) { if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { if ((lzma_vli)(size) > coder->uncompressed_size) return true; coder->uncompressed_size -= size; } return false; } /// Calls the subfilter and updates coder->uncompressed_size. static lzma_ret subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { assert(coder->subfilter.code != NULL); const size_t out_start = *out_pos; // Call the subfilter. const lzma_ret ret = coder->subfilter.code( coder->subfilter.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); // Update uncompressed_size. if (update_uncompressed_size(coder, *out_pos - out_start)) return LZMA_DATA_ERROR; return ret; } static lzma_ret decode_buffer(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { while (*out_pos < out_size && (*in_pos < in_size || coder->sequence >= SEQ_DATA)) switch (coder->sequence) { case SEQ_FLAGS: { if ((in[*in_pos] >> 4) != FLAG_PADDING) coder->padding = 0; // Do the correct action depending on the Subblock Type. switch (in[*in_pos] >> 4) { case FLAG_PADDING: // Only check that reserved bits are zero. if (++coder->padding > PADDING_MAX || in[*in_pos] & 0x0F) return LZMA_DATA_ERROR; ++*in_pos; break; case FLAG_EOPM: // Check that reserved bits are zero. if (in[*in_pos] & 0x0F) return LZMA_DATA_ERROR; // There must be no Subfilter enabled. if (coder->subfilter.code != NULL) return LZMA_DATA_ERROR; // End of Payload Marker must not be used if // uncompressed size is known. if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) return LZMA_DATA_ERROR; ++*in_pos; return LZMA_STREAM_END; case FLAG_DATA: // First four bits of the Subblock Data size. coder->size = in[*in_pos] & 0x0F; ++*in_pos; coder->got_output_with_subfilter = true; coder->sequence = SEQ_SIZE_1; break; case FLAG_REPEAT: // First four bits of the Repeat Count. We use // coder->size as a temporary place for it. coder->size = in[*in_pos] & 0x0F; ++*in_pos; coder->got_output_with_subfilter = true; coder->sequence = SEQ_REPEAT_COUNT_1; break; case FLAG_SET_SUBFILTER: { if ((in[*in_pos] & 0x0F) || coder->subfilter.code != NULL || !coder->allow_subfilters) return LZMA_DATA_ERROR; assert(coder->filter_flags.options == NULL); return_if_error(lzma_filter_flags_decoder_init( &coder->filter_flags_decoder, allocator, &coder->filter_flags)); coder->got_output_with_subfilter = false; ++*in_pos; coder->sequence = SEQ_FILTER_FLAGS; break; } case FLAG_END_SUBFILTER: if (coder->subfilter.code == NULL || !coder->got_output_with_subfilter) return LZMA_DATA_ERROR; // Tell the helper filter to indicate End of Input // to our subfilter. coder->helper.end_was_reached = true; size_t dummy = 0; const lzma_ret ret = subfilter_decode(coder, allocator, NULL, &dummy, 0, out, out_pos,out_size, action); // If we didn't reach the end of the subfilter's output // yet, return to the application. On the next call we // will get to this same switch-case again, because we // haven't updated *in_pos yet. if (ret != LZMA_STREAM_END) return ret; // Free Subfilter's memory. This is a bit debatable, // since we could avoid some malloc()/free() calls // if the same Subfilter gets used soon again. But // if Subfilter isn't used again, we could leave // a memory-hogging filter dangling until someone // frees Subblock filter itself. lzma_next_coder_end(&coder->subfilter, allocator); // Free memory used for subfilter options. This is // safe, because we don't support any Subfilter that // would allow pointers in the options structure. lzma_free(coder->filter_flags.options, allocator); coder->filter_flags.options = NULL; ++*in_pos; if (coder->uncompressed_size == 0) return LZMA_STREAM_END; break; default: return LZMA_DATA_ERROR; } break; } case SEQ_FILTER_FLAGS: { const lzma_ret ret = coder->filter_flags_decoder.code( coder->filter_flags_decoder.coder, allocator, in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); if (ret != LZMA_STREAM_END) return ret == LZMA_HEADER_ERROR ? LZMA_DATA_ERROR : ret; // Don't free the filter_flags_decoder. It doesn't take much // memory and we may need it again. // Initialize the Subfilter. Subblock and Copy filters are // not allowed. if (coder->filter_flags.id == LZMA_FILTER_COPY || coder->filter_flags.id == LZMA_FILTER_SUBBLOCK) return LZMA_DATA_ERROR; coder->helper.end_was_reached = false; lzma_options_filter filters[3] = { { .id = coder->filter_flags.id, .options = coder->filter_flags.options, }, { .id = LZMA_FILTER_SUBBLOCK_HELPER, .options = &coder->helper, }, { .id = LZMA_VLI_VALUE_UNKNOWN, .options = NULL, } }; // Optimization: We know that LZMA uses End of Payload Marker // (not End of Input), so we can omit the helper filter. if (filters[0].id == LZMA_FILTER_LZMA) filters[1].id = LZMA_VLI_VALUE_UNKNOWN; return_if_error(lzma_raw_decoder_init( &coder->subfilter, allocator, filters, LZMA_VLI_VALUE_UNKNOWN, false)); coder->sequence = SEQ_FLAGS; break; } case SEQ_FILTER_END: // We are in the beginning of a Subblock. The next Subblock // whose type is not Padding, must indicate end of Subfilter. if (in[*in_pos] == (FLAG_PADDING << 4)) { ++*in_pos; break; } if (in[*in_pos] != (FLAG_END_SUBFILTER << 4)) return LZMA_DATA_ERROR; coder->sequence = SEQ_FLAGS; break; case SEQ_REPEAT_COUNT_1: case SEQ_SIZE_1: // We use the same code to parse // - the Size (28 bits) in Subblocks of type Data; and // - the Repeat count (28 bits) in Subblocks of type // Repeating Data. coder->size |= (size_t)(in[*in_pos]) << 4; ++*in_pos; ++coder->sequence; break; case SEQ_REPEAT_COUNT_2: case SEQ_SIZE_2: coder->size |= (size_t)(in[*in_pos]) << 12; ++*in_pos; ++coder->sequence; break; case SEQ_REPEAT_COUNT_3: case SEQ_SIZE_3: coder->size |= (size_t)(in[*in_pos]) << 20; ++*in_pos; // The real value is the stored value plus one. ++coder->size; // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence. ++coder->sequence; break; case SEQ_REPEAT_SIZE: // Move the Repeat Count to the correct variable and parse // the Size of the Data to be repeated. coder->repeat.count = coder->size; coder->repeat.size = (size_t)(in[*in_pos]) + 1; coder->repeat.pos = 0; ++*in_pos; coder->sequence = SEQ_REPEAT_READ_DATA; break; case SEQ_REPEAT_READ_DATA: { // Fill coder->repeat.buffer[]. const size_t in_avail = in_size - *in_pos; const size_t out_avail = coder->repeat.size - coder->repeat.pos; const size_t copy_size = MIN(in_avail, out_avail); memcpy(coder->repeat.buffer + coder->repeat.pos, in + *in_pos, copy_size); *in_pos += copy_size; coder->repeat.pos += copy_size; if (coder->repeat.pos == coder->repeat.size) { coder->repeat.pos = 0; if (coder->repeat.size == 1 && coder->subfilter.code == NULL) coder->sequence = SEQ_REPEAT_FAST; else coder->sequence = SEQ_REPEAT_NORMAL; } break; } case SEQ_DATA: { // Limit the amount of input to match the available // Subblock Data size. size_t in_limit; if (in_size - *in_pos > coder->size) in_limit = *in_pos + coder->size; else in_limit = in_size; if (coder->subfilter.code == NULL) { const size_t copy_size = bufcpy( in, in_pos, in_limit, out, out_pos, out_size); coder->size -= copy_size; if (update_uncompressed_size(coder, copy_size)) return LZMA_DATA_ERROR; } else { const size_t in_start = *in_pos; const lzma_ret ret = subfilter_decode( coder, allocator, in, in_pos, in_limit, out, out_pos, out_size, action); // Update the number of unprocessed bytes left in // this Subblock. This assert() is true because // in_limit prevents *in_pos getting too big. assert(*in_pos - in_start <= coder->size); coder->size -= *in_pos - in_start; if (ret == LZMA_STREAM_END) { // End of Subfilter can occur only at // a Subblock boundary. if (coder->size != 0) return LZMA_DATA_ERROR; // We need a Subblock with Unset // Subfilter before more data. coder->sequence = SEQ_FILTER_END; break; } if (ret != LZMA_OK) return ret; } // If we couldn't process the whole Subblock Data yet, return. if (coder->size > 0) return LZMA_OK; // Check if we have decoded all the data. if (coder->uncompressed_size == 0 && coder->subfilter.code == NULL) return LZMA_STREAM_END; coder->sequence = SEQ_FLAGS; break; } case SEQ_REPEAT_FAST: { // Optimization for cases when there is only one byte to // repeat and no Subfilter. const size_t out_avail = out_size - *out_pos; const size_t copy_size = MIN(coder->repeat.count, out_avail); memset(out + *out_pos, coder->repeat.buffer[0], copy_size); *out_pos += copy_size; coder->repeat.count -= copy_size; if (update_uncompressed_size(coder, copy_size)) return LZMA_DATA_ERROR; if (coder->repeat.count == 0) { assert(coder->subfilter.code == NULL); if (coder->uncompressed_size == 0) return LZMA_STREAM_END; } else { return LZMA_OK; } coder->sequence = SEQ_FLAGS; break; } case SEQ_REPEAT_NORMAL: do { // Cycle the repeat buffer if needed. if (coder->repeat.pos == coder->repeat.size) { if (--coder->repeat.count == 0) { coder->sequence = SEQ_FLAGS; break; } coder->repeat.pos = 0; } if (coder->subfilter.code == NULL) { const size_t copy_size = bufcpy( coder->repeat.buffer, &coder->repeat.pos, coder->repeat.size, out, out_pos, out_size); if (update_uncompressed_size(coder, copy_size)) return LZMA_DATA_ERROR; } else { const lzma_ret ret = subfilter_decode( coder, allocator, coder->repeat.buffer, &coder->repeat.pos, coder->repeat.size, out, out_pos, out_size, action); if (ret == LZMA_STREAM_END) { // End of Subfilter can occur only at // a Subblock boundary. if (coder->repeat.pos != coder->repeat.size || --coder->repeat .count != 0) return LZMA_DATA_ERROR; // We need a Subblock with Unset // Subfilter before more data. coder->sequence = SEQ_FILTER_END; break; } else if (ret != LZMA_OK) { return ret; } } } while (*out_pos < out_size); // Check if we have decoded all the data. if (coder->uncompressed_size == 0 && coder->subfilter.code == NULL) return LZMA_STREAM_END; break; default: return LZMA_PROG_ERROR; } return LZMA_OK; } static lzma_ret subblock_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { if (coder->next.code == NULL) return decode_buffer(coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); while (*out_pos < out_size) { if (!coder->next_finished && coder->temp.pos == coder->temp.size) { coder->temp.pos = 0; coder->temp.size = 0; const lzma_ret ret = coder->next.code( coder->next.coder, allocator, in, in_pos, in_size, coder->temp.buffer, &coder->temp.size, LZMA_BUFFER_SIZE, action); if (ret == LZMA_STREAM_END) coder->next_finished = true; else if (coder->temp.size == 0 || ret != LZMA_OK) return ret; } if (coder->this_finished) { if (coder->temp.pos != coder->temp.size) return LZMA_DATA_ERROR; if (coder->next_finished) return LZMA_STREAM_END; return LZMA_OK; } const lzma_ret ret = decode_buffer(coder, allocator, coder->temp.buffer, &coder->temp.pos, coder->temp.size, out, out_pos, out_size, action); if (ret == LZMA_STREAM_END) // The next coder in the chain hasn't finished // yet. If the input data is valid, there // must be no more output coming, but the // next coder may still need a litle more // input to detect End of Payload Marker. coder->this_finished = true; else if (ret != LZMA_OK) return ret; else if (coder->next_finished && *out_pos < out_size) return LZMA_DATA_ERROR; } return LZMA_OK; } static void subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { lzma_next_coder_end(&coder->next, allocator); lzma_next_coder_end(&coder->subfilter, allocator); lzma_next_coder_end(&coder->filter_flags_decoder, allocator); lzma_free(coder->filter_flags.options, allocator); lzma_free(coder, allocator); return; } extern lzma_ret lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) return LZMA_MEM_ERROR; next->code = &subblock_decode; next->end = &subblock_decoder_end; next->coder->next = LZMA_NEXT_CODER_INIT; next->coder->subfilter = LZMA_NEXT_CODER_INIT; next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; } else { lzma_next_coder_end(&next->coder->subfilter, allocator); lzma_free(next->coder->filter_flags.options, allocator); } next->coder->filter_flags.options = NULL; next->coder->sequence = SEQ_FLAGS; next->coder->uncompressed_size = filters[0].uncompressed_size; next->coder->padding = 0; next->coder->next_finished = false; next->coder->this_finished = false; next->coder->temp.pos = 0; next->coder->temp.size = 0; if (filters[0].options != NULL) next->coder->allow_subfilters = ((lzma_options_subblock *)( filters[0].options))->allow_subfilters; else next->coder->allow_subfilters = false; return lzma_next_filter_init( &next->coder->next, allocator, filters + 1); }