xz/src/liblzma/subblock/subblock_decoder.c

///////////////////////////////////////////////////////////////////////////////
//
/// \file       subblock_decoder.c
/// \brief      Decoder of the Subblock filter
//
//  Copyright (C) 2007 Lasse Collin
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////

#include "subblock_decoder.h"
#include "subblock_decoder_helper.h"
#include "raw_decoder.h"


/// Maximum number of consecutive Subblocks with Subblock Type Padding
#define PADDING_MAX 31


struct lzma_coder_s {
	lzma_next_coder next;

	enum {
		// These require that there is at least one input
		// byte available.
		SEQ_FLAGS,
		SEQ_FILTER_FLAGS,
		SEQ_FILTER_END,
		SEQ_REPEAT_COUNT_1,
		SEQ_REPEAT_COUNT_2,
		SEQ_REPEAT_COUNT_3,
		SEQ_REPEAT_SIZE,
		SEQ_REPEAT_READ_DATA,
		SEQ_SIZE_1,
		SEQ_SIZE_2,
		SEQ_SIZE_3, // This must be right before SEQ_DATA.

		// These don't require any input to be available.
		SEQ_DATA,
		SEQ_REPEAT_FAST,
		SEQ_REPEAT_NORMAL,
	} sequence;

	/// Number of bytes left in the current Subblock Data field.
	size_t size;

	/// Number of consecutive Subblocks with Subblock Type Padding
	uint32_t padding;

	/// True when .next.code() has returned LZMA_STREAM_END.
	bool next_finished;

	/// True when the Subblock decoder has detected End of Payload Marker.
	/// This may become true before next_finished becomes true.
	bool this_finished;

	/// True if Subfilters are allowed.
	bool allow_subfilters;

	/// Indicates if at least one byte of decoded output has been
	/// produced after enabling Subfilter.
	bool got_output_with_subfilter;

	/// Possible subfilter
	lzma_next_coder subfilter;

	/// Filter Flags decoder is needed to parse the ID and Properties
	/// of the subfilter.
	lzma_next_coder filter_flags_decoder;

	/// The filter_flags_decoder stores its results here.
	lzma_options_filter filter_flags;

	/// Options for the Subblock decoder helper. This is used to tell
	/// the helper when it should return LZMA_STREAM_END to the subfilter.
	lzma_options_subblock_helper helper;

	struct {
		/// How many times buffer should be repeated
		size_t count;

		/// Size of the buffer
		size_t size;

		/// Position in the buffer
		size_t pos;

		/// Buffer to hold the data to be repeated
		uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
	} repeat;

	/// Temporary buffer needed when the Subblock filter is not the last
	/// filter in the chain. The output of the next filter is first
	/// decoded into buffer[], which is then used as input for the actual
	/// Subblock decoder.
	struct {
		size_t pos;
		size_t size;
		uint8_t buffer[LZMA_BUFFER_SIZE];
	} temp;
};


/// Values of valid Subblock Flags
enum {
	FLAG_PADDING,
	FLAG_EOPM,
	FLAG_DATA,
	FLAG_REPEAT,
	FLAG_SET_SUBFILTER,
	FLAG_END_SUBFILTER,
};


/// Calls the subfilter and updates coder->uncompressed_size.
static lzma_ret
subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
		const uint8_t *in, size_t *in_pos,
		size_t in_size, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size, lzma_action action)
{
	assert(coder->subfilter.code != NULL);

	// Call the subfilter.
	const lzma_ret ret = coder->subfilter.code(
			coder->subfilter.coder, allocator,
			in, in_pos, in_size, out, out_pos, out_size, action);

	return ret;
}


static lzma_ret
decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
		const uint8_t *in, size_t *in_pos,
		size_t in_size, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size, lzma_action action)
{
	while (*out_pos < out_size && (*in_pos < in_size
			|| coder->sequence >= SEQ_DATA))
	switch (coder->sequence) {
	case SEQ_FLAGS: {
		// Do the correct action depending on the Subblock Type.
		switch (in[*in_pos] >> 4) {
		case FLAG_PADDING:
			// Only check that reserved bits are zero.
			if (++coder->padding > PADDING_MAX
					|| in[*in_pos] & 0x0F)
				return LZMA_DATA_ERROR;
			++*in_pos;
			break;

		case FLAG_EOPM:
			// There must be no Padding before EOPM.
			if (coder->padding != 0)
				return LZMA_DATA_ERROR;

			// Check that reserved bits are zero.
			if (in[*in_pos] & 0x0F)
				return LZMA_DATA_ERROR;

			// There must be no Subfilter enabled.
			if (coder->subfilter.code != NULL)
				return LZMA_DATA_ERROR;

			++*in_pos;
			return LZMA_STREAM_END;

		case FLAG_DATA:
			// First four bits of the Subblock Data size.
			coder->size = in[*in_pos] & 0x0F;
			++*in_pos;
			coder->got_output_with_subfilter = true;
			coder->sequence = SEQ_SIZE_1;
			break;

		case FLAG_REPEAT:
			// First four bits of the Repeat Count. We use
			// coder->size as a temporary place for it.
			coder->size = in[*in_pos] & 0x0F;
			++*in_pos;
			coder->got_output_with_subfilter = true;
			coder->sequence = SEQ_REPEAT_COUNT_1;
			break;

		case FLAG_SET_SUBFILTER: {
			if (coder->padding != 0 || (in[*in_pos] & 0x0F)
					|| coder->subfilter.code != NULL
					|| !coder->allow_subfilters)
				return LZMA_DATA_ERROR;

			assert(coder->filter_flags.options == NULL);
			abort();
// 			return_if_error(lzma_filter_flags_decoder_init(
// 					&coder->filter_flags_decoder,
// 					allocator, &coder->filter_flags));

			coder->got_output_with_subfilter = false;

			++*in_pos;
			coder->sequence = SEQ_FILTER_FLAGS;
			break;
		}

		case FLAG_END_SUBFILTER:
			if (coder->padding != 0 || (in[*in_pos] & 0x0F)
					|| coder->subfilter.code == NULL
					|| !coder->got_output_with_subfilter)
				return LZMA_DATA_ERROR;

			// Tell the helper filter to indicate End of Input
			// to our subfilter.
			coder->helper.end_was_reached = true;

			size_t dummy = 0;
			const lzma_ret ret = subfilter_decode(coder, allocator,
					NULL, &dummy, 0, out, out_pos,out_size,
					action);

			// If we didn't reach the end of the subfilter's output
			// yet, return to the application. On the next call we
			// will get to this same switch-case again, because we
			// haven't updated *in_pos yet.
			if (ret != LZMA_STREAM_END)
				return ret;

			// Free Subfilter's memory. This is a bit debatable,
			// since we could avoid some malloc()/free() calls
			// if the same Subfilter gets used soon again. But
			// if Subfilter isn't used again, we could leave
			// a memory-hogging filter dangling until someone
			// frees Subblock filter itself.
			lzma_next_coder_end(&coder->subfilter, allocator);

			// Free memory used for subfilter options. This is
			// safe, because we don't support any Subfilter that
			// would allow pointers in the options structure.
			lzma_free(coder->filter_flags.options, allocator);
			coder->filter_flags.options = NULL;

			++*in_pos;

			break;

		default:
			return LZMA_DATA_ERROR;
		}

		break;
	}

	case SEQ_FILTER_FLAGS: {
		const lzma_ret ret = coder->filter_flags_decoder.code(
				coder->filter_flags_decoder.coder, allocator,
				in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
		if (ret != LZMA_STREAM_END)
			return ret == LZMA_HEADER_ERROR
					? LZMA_DATA_ERROR : ret;

		// Don't free the filter_flags_decoder. It doesn't take much
		// memory and we may need it again.

		// Initialize the Subfilter. Subblock and Copy filters are
		// not allowed.
		if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
			return LZMA_DATA_ERROR;

		coder->helper.end_was_reached = false;

		lzma_options_filter filters[3] = {
			{
				.id = coder->filter_flags.id,
				.options = coder->filter_flags.options,
			}, {
				.id = LZMA_FILTER_SUBBLOCK_HELPER,
				.options = &coder->helper,
			}, {
				.id = LZMA_VLI_VALUE_UNKNOWN,
				.options = NULL,
			}
		};

		// Optimization: We know that LZMA uses End of Payload Marker
		// (not End of Input), so we can omit the helper filter.
		if (filters[0].id == LZMA_FILTER_LZMA)
			filters[1].id = LZMA_VLI_VALUE_UNKNOWN;

		return_if_error(lzma_raw_decoder_init(
				&coder->subfilter, allocator, filters));

		coder->sequence = SEQ_FLAGS;
		break;
	}

	case SEQ_FILTER_END:
		// We are in the beginning of a Subblock. The next Subblock
		// whose type is not Padding, must indicate end of Subfilter.
		if (in[*in_pos] == (FLAG_PADDING << 4)) {
			++*in_pos;
			break;
		}

		if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
			return LZMA_DATA_ERROR;

		coder->sequence = SEQ_FLAGS;
		break;

	case SEQ_REPEAT_COUNT_1:
	case SEQ_SIZE_1:
		// We use the same code to parse
		//  - the Size (28 bits) in Subblocks of type Data; and
		//  - the Repeat count (28 bits) in Subblocks of type
		//    Repeating Data.
		coder->size |= (size_t)(in[*in_pos]) << 4;
		++*in_pos;
		++coder->sequence;
		break;

	case SEQ_REPEAT_COUNT_2:
	case SEQ_SIZE_2:
		coder->size |= (size_t)(in[*in_pos]) << 12;
		++*in_pos;
		++coder->sequence;
		break;

	case SEQ_REPEAT_COUNT_3:
	case SEQ_SIZE_3:
		coder->size |= (size_t)(in[*in_pos]) << 20;
		++*in_pos;

		// The real value is the stored value plus one.
		++coder->size;

		// This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
		// SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
		++coder->sequence;
		break;

	case SEQ_REPEAT_SIZE:
		// Move the Repeat Count to the correct variable and parse
		// the Size of the Data to be repeated.
		coder->repeat.count = coder->size;
		coder->repeat.size = (size_t)(in[*in_pos]) + 1;
		coder->repeat.pos = 0;

		// The size of the Data field must be bigger than the number
		// of Padding bytes before this Subblock.
		if (coder->repeat.size <= coder->padding)
			return LZMA_DATA_ERROR;

		++*in_pos;
		coder->padding = 0;
		coder->sequence = SEQ_REPEAT_READ_DATA;
		break;

	case SEQ_REPEAT_READ_DATA: {
		// Fill coder->repeat.buffer[].
		const size_t in_avail = in_size - *in_pos;
		const size_t out_avail
				= coder->repeat.size - coder->repeat.pos;
		const size_t copy_size = MIN(in_avail, out_avail);

		memcpy(coder->repeat.buffer + coder->repeat.pos,
				in + *in_pos, copy_size);
		*in_pos += copy_size;
		coder->repeat.pos += copy_size;

		if (coder->repeat.pos == coder->repeat.size) {
			coder->repeat.pos = 0;

			if (coder->repeat.size == 1
					&& coder->subfilter.code == NULL)
				coder->sequence = SEQ_REPEAT_FAST;
			else
				coder->sequence = SEQ_REPEAT_NORMAL;
		}

		break;
	}

	case SEQ_DATA: {
		// The size of the Data field must be bigger than the number
		// of Padding bytes before this Subblock.
		assert(coder->size > 0);
		if (coder->size <= coder->padding)
			return LZMA_DATA_ERROR;

		coder->padding = 0;

		// Limit the amount of input to match the available
		// Subblock Data size.
		size_t in_limit;
		if (in_size - *in_pos > coder->size)
			in_limit = *in_pos + coder->size;
		else
			in_limit = in_size;

		if (coder->subfilter.code == NULL) {
			const size_t copy_size = bufcpy(
					in, in_pos, in_limit,
					out, out_pos, out_size);

			coder->size -= copy_size;
		} else {
			const size_t in_start = *in_pos;
			const lzma_ret ret = subfilter_decode(
					coder, allocator,
					in, in_pos, in_limit,
					out, out_pos, out_size,
					action);

			// Update the number of unprocessed bytes left in
			// this Subblock. This assert() is true because
			// in_limit prevents *in_pos getting too big.
			assert(*in_pos - in_start <= coder->size);
			coder->size -= *in_pos - in_start;

			if (ret == LZMA_STREAM_END) {
				// End of Subfilter can occur only at
				// a Subblock boundary.
				if (coder->size != 0)
					return LZMA_DATA_ERROR;

				// We need a Subblock with Unset
				// Subfilter before more data.
				coder->sequence = SEQ_FILTER_END;
				break;
			}

			if (ret != LZMA_OK)
				return ret;
		}

		// If we couldn't process the whole Subblock Data yet, return.
		if (coder->size > 0)
			return LZMA_OK;

		coder->sequence = SEQ_FLAGS;
		break;
	}

	case SEQ_REPEAT_FAST: {
		// Optimization for cases when there is only one byte to
		// repeat and no Subfilter.
		const size_t out_avail = out_size - *out_pos;
		const size_t copy_size = MIN(coder->repeat.count, out_avail);

		memset(out + *out_pos, coder->repeat.buffer[0], copy_size);

		*out_pos += copy_size;
		coder->repeat.count -= copy_size;

		if (coder->repeat.count != 0)
			return LZMA_OK;

		coder->sequence = SEQ_FLAGS;
		break;
	}

	case SEQ_REPEAT_NORMAL:
		do {
			// Cycle the repeat buffer if needed.
			if (coder->repeat.pos == coder->repeat.size) {
				if (--coder->repeat.count == 0) {
					coder->sequence = SEQ_FLAGS;
					break;
				}

				coder->repeat.pos = 0;
			}

			if (coder->subfilter.code == NULL) {
				bufcpy(coder->repeat.buffer,
						&coder->repeat.pos,
						coder->repeat.size,
						out, out_pos, out_size);
			} else {
				const lzma_ret ret = subfilter_decode(
						coder, allocator,
						coder->repeat.buffer,
						&coder->repeat.pos,
						coder->repeat.size,
						out, out_pos, out_size,
						action);

				if (ret == LZMA_STREAM_END) {
					// End of Subfilter can occur only at
					// a Subblock boundary.
					if (coder->repeat.pos
							!= coder->repeat.size
							|| --coder->repeat
								.count != 0)
						return LZMA_DATA_ERROR;

					// We need a Subblock with Unset
					// Subfilter before more data.
					coder->sequence = SEQ_FILTER_END;
					break;

				} else if (ret != LZMA_OK) {
					return ret;
				}
			}
		} while (*out_pos < out_size);

		break;

	default:
		return LZMA_PROG_ERROR;
	}

	return LZMA_OK;
}


static lzma_ret
subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
		const uint8_t *restrict in, size_t *restrict in_pos,
		size_t in_size, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size, lzma_action action)
{
	if (coder->next.code == NULL)
		return decode_buffer(coder, allocator, in, in_pos, in_size,
				out, out_pos, out_size, action);

	while (*out_pos < out_size) {
		if (!coder->next_finished
				&& coder->temp.pos == coder->temp.size) {
			coder->temp.pos = 0;
			coder->temp.size = 0;

			const lzma_ret ret = coder->next.code(
					coder->next.coder,
					allocator, in, in_pos, in_size,
					coder->temp.buffer, &coder->temp.size,
					LZMA_BUFFER_SIZE, action);

			if (ret == LZMA_STREAM_END)
				coder->next_finished = true;
			else if (coder->temp.size == 0 || ret != LZMA_OK)
				return ret;
		}

		if (coder->this_finished) {
			if (coder->temp.pos != coder->temp.size)
				return LZMA_DATA_ERROR;

			if (coder->next_finished)
				return LZMA_STREAM_END;

			return LZMA_OK;
		}

		const lzma_ret ret = decode_buffer(coder, allocator,
				coder->temp.buffer, &coder->temp.pos,
				coder->temp.size,
				out, out_pos, out_size, action);

		if (ret == LZMA_STREAM_END)
			// The next coder in the chain hasn't finished
			// yet. If the input data is valid, there
			// must be no more output coming, but the
			// next coder may still need a litle more
			// input to detect End of Payload Marker.
			coder->this_finished = true;
		else if (ret != LZMA_OK)
			return ret;
		else if (coder->next_finished && *out_pos < out_size)
			return LZMA_DATA_ERROR;
	}

	return LZMA_OK;
}


static void
subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
	lzma_next_coder_end(&coder->next, allocator);
	lzma_next_coder_end(&coder->subfilter, allocator);
	lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
	lzma_free(coder->filter_flags.options, allocator);
	lzma_free(coder, allocator);
	return;
}


extern lzma_ret
lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
		const lzma_filter_info *filters)
{
	if (next->coder == NULL) {
		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
		if (next->coder == NULL)
			return LZMA_MEM_ERROR;

		next->code = &subblock_decode;
		next->end = &subblock_decoder_end;

		next->coder->next = LZMA_NEXT_CODER_INIT;
		next->coder->subfilter = LZMA_NEXT_CODER_INIT;
		next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;

	} else {
		lzma_next_coder_end(&next->coder->subfilter, allocator);
		lzma_free(next->coder->filter_flags.options, allocator);
	}

	next->coder->filter_flags.options = NULL;

	next->coder->sequence = SEQ_FLAGS;
	next->coder->padding = 0;
	next->coder->next_finished = false;
	next->coder->this_finished = false;
	next->coder->temp.pos = 0;
	next->coder->temp.size = 0;

	if (filters[0].options != NULL)
		next->coder->allow_subfilters = ((lzma_options_subblock *)(
				filters[0].options))->allow_subfilters;
	else
		next->coder->allow_subfilters = false;

	return lzma_next_filter_init(
			&next->coder->next, allocator, filters + 1);
}