xz/src/liblzma/lzma/lzma_encoder.c

///////////////////////////////////////////////////////////////////////////////
//
/// \file       lzma_encoder.c
/// \brief      LZMA encoder
//
//  Copyright (C) 1999-2006 Igor Pavlov
//  Copyright (C) 2007 Lasse Collin
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////

// NOTE: If you want to keep the line length in 80 characters, set
//       tab width to 4 or less in your editor when editing this file.


#include "lzma_encoder_private.h"
#include "fastpos.h"


////////////
// Macros //
////////////

// These are as macros mostly because they use local range encoder variables.

#define literal_encode(subcoder, symbol) \
do { \
	uint32_t context = 1; \
	int i = 8; \
	do { \
		--i; \
		const uint32_t bit = ((symbol) >> i) & 1; \
		bit_encode(subcoder[context], bit); \
		context = (context << 1) | bit; \
	} while (i != 0); \
} while (0)


#define literal_encode_matched(subcoder, match_byte, symbol) \
do { \
	uint32_t context = 1; \
	int i = 8; \
	do { \
		--i; \
		uint32_t bit = ((symbol) >> i) & 1; \
		const uint32_t match_bit = ((match_byte) >> i) & 1; \
		const uint32_t subcoder_index = 0x100 + (match_bit << 8) + context; \
		bit_encode(subcoder[subcoder_index], bit); \
		context = (context << 1) | bit; \
		if (match_bit != bit) { \
			while (i != 0) { \
				--i; \
				bit = ((symbol) >> i) & 1; \
				bit_encode(subcoder[context], bit); \
				context = (context << 1) | bit; \
			} \
			break; \
		} \
	} while (i != 0); \
} while (0)


#define length_encode(length_encoder, symbol, pos_state, update_price) \
do { \
	assert((symbol) <= MATCH_MAX_LEN); \
	if ((symbol) < LEN_LOW_SYMBOLS) { \
		bit_encode_0((length_encoder).choice); \
		bittree_encode((length_encoder).low[pos_state], \
				LEN_LOW_BITS, symbol); \
	} else { \
		bit_encode_1((length_encoder).choice); \
		if ((symbol) < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS) { \
			bit_encode_0((length_encoder).choice2); \
			bittree_encode((length_encoder).mid[pos_state], \
					LEN_MID_BITS, \
					(symbol) - LEN_LOW_SYMBOLS); \
		} else { \
			bit_encode_1((length_encoder).choice2); \
			bittree_encode((length_encoder).high, LEN_HIGH_BITS, \
					(symbol) - LEN_LOW_SYMBOLS \
					- LEN_MID_SYMBOLS); \
		} \
	} \
	if (update_price) \
		if (--(length_encoder).counters[pos_state] == 0) \
			lzma_length_encoder_update_table(&(length_encoder), pos_state); \
} while (0)


///////////////
// Functions //
///////////////

/// \brief      Updates price table of the length encoder
///
/// Like all the other prices in LZMA, these are used by lzma_get_optimum().
///
extern void
lzma_length_encoder_update_table(lzma_length_encoder *lencoder,
		const uint32_t pos_state)
{
	const uint32_t num_symbols = lencoder->table_size;
	const uint32_t a0 = bit_get_price_0(lencoder->choice);
	const uint32_t a1 = bit_get_price_1(lencoder->choice);
	const uint32_t b0 = a1 + bit_get_price_0(lencoder->choice2);
	const uint32_t b1 = a1 + bit_get_price_1(lencoder->choice2);

	uint32_t *prices = lencoder->prices[pos_state];
	uint32_t i = 0;

	for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i)
		prices[i] = a0 + bittree_get_price(lencoder->low[pos_state],
				LEN_LOW_BITS, i);

	for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i)
		prices[i] = b0 + bittree_get_price(lencoder->mid[pos_state],
				LEN_MID_BITS, i - LEN_LOW_SYMBOLS);

	for (; i < num_symbols; ++i)
		prices[i] = b1 + bittree_get_price(
				lencoder->high, LEN_HIGH_BITS,
				i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);

	lencoder->counters[pos_state] = num_symbols;

	return;
}


/**
 * \brief       LZMA encoder
 *
 * \return      true if end of stream was reached, false otherwise.
 */
extern bool
lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size)
{
#define rc_buffer coder->lz.temp
#define rc_buffer_size coder->lz.temp_size

	// Local copies
	lzma_range_encoder rc = coder->rc;
	size_t out_pos_local = *out_pos;
	const uint32_t pos_mask = coder->pos_mask;
	const bool best_compression = coder->best_compression;

	// Initialize the stream if no data has been encoded yet.
	if (!coder->is_initialized) {
		if (coder->lz.read_pos == coder->lz.read_limit) {
			if (coder->lz.sequence == SEQ_RUN)
				return false; // We cannot do anything.

			// We are finishing (we cannot get here when flushing).
			assert(coder->lz.write_pos == coder->lz.read_pos);
			assert(coder->lz.sequence == SEQ_FINISH);
		} else {
			// Do the actual initialization.
			uint32_t len;
			uint32_t num_distance_pairs;
			lzma_read_match_distances(coder, &len, &num_distance_pairs);

			bit_encode_0(coder->is_match[coder->state][0]);
			update_literal(coder->state);

			const uint8_t cur_byte = coder->lz.buffer[
					coder->lz.read_pos - coder->additional_offset];
			probability *subcoder = literal_get_subcoder(coder->literal_coder,
					coder->now_pos, coder->previous_byte);
			literal_encode(subcoder, cur_byte);

			coder->previous_byte = cur_byte;
			--coder->additional_offset;
			++coder->now_pos;

			assert(coder->additional_offset == 0);
		}

		// Initialization is done (except if empty file).
		coder->is_initialized = true;
	}

	// Encoding loop
	while (true) {
		// Check that there is free output space.
		if (out_pos_local == out_size)
			break;

		assert(rc_buffer_size == 0);

		// Check that there is some input to process.
		if (coder->lz.read_pos >= coder->lz.read_limit) {
			// If flushing or finishing, we must keep encoding
			// until additional_offset becomes zero to make
			// all the input available at output.
			if (coder->lz.sequence == SEQ_RUN
					|| coder->additional_offset == 0)
				break;
		}

		assert(coder->lz.read_pos <= coder->lz.write_pos);

#ifndef NDEBUG
		if (coder->lz.sequence != SEQ_RUN) {
			assert(coder->lz.read_limit == coder->lz.write_pos);
		} else {
			assert(coder->lz.read_limit + coder->lz.keep_size_after
					== coder->lz.write_pos);
		}
#endif

		const uint32_t pos_state = coder->now_pos & pos_mask;

		uint32_t pos;
		uint32_t len;

		// Get optimal match (repeat position and length).
		// Value ranges for pos:
		//   - [0, REP_DISTANCES): repeated match
		//   - [REP_DISTANCES, UINT32_MAX): match at (pos - REP_DISTANCES)
		//   - UINT32_MAX: not a match but a literal
		// Value ranges for len:
		//   - [MATCH_MIN_LEN, MATCH_MAX_LEN]
		if (best_compression)
			lzma_get_optimum(coder, &pos, &len);
		else
			lzma_get_optimum_fast(coder, &pos, &len);

		if (len == 1 && pos == UINT32_MAX) {
			// It's a literal.
			bit_encode_0(coder->is_match[coder->state][pos_state]);

			const uint8_t cur_byte = coder->lz.buffer[
					coder->lz.read_pos - coder->additional_offset];
			probability *subcoder = literal_get_subcoder(coder->literal_coder,
					coder->now_pos, coder->previous_byte);

			if (is_literal_state(coder->state)) {
				literal_encode(subcoder, cur_byte);
			} else {
				const uint8_t match_byte = coder->lz.buffer[
						coder->lz.read_pos
						- coder->rep_distances[0] - 1
						- coder->additional_offset];
				literal_encode_matched(subcoder, match_byte, cur_byte);
			}

			update_literal(coder->state);
			coder->previous_byte = cur_byte;

		} else {
			// It's a match.
			bit_encode_1(coder->is_match[coder->state][pos_state]);

			if (pos < REP_DISTANCES) {
				// It's a repeated match i.e. the same distance
				// has been used earlier.
				bit_encode_1(coder->is_rep[coder->state]);

				if (pos == 0) {
					bit_encode_0(coder->is_rep0[coder->state]);
					const uint32_t symbol = (len == 1) ? 0 : 1;
					bit_encode(coder->is_rep0_long[coder->state][pos_state],
							symbol);
				} else {
					const uint32_t distance = coder->rep_distances[pos];
					bit_encode_1(coder->is_rep0[coder->state]);

					if (pos == 1) {
						bit_encode_0(coder->is_rep1[coder->state]);
					} else {
						bit_encode_1(coder->is_rep1[coder->state]);
						bit_encode(coder->is_rep2[coder->state], pos - 2);

						if (pos == 3)
							coder->rep_distances[3] = coder->rep_distances[2];

						coder->rep_distances[2] = coder->rep_distances[1];
					}

					coder->rep_distances[1] = coder->rep_distances[0];
					coder->rep_distances[0] = distance;
				}

				if (len == 1) {
					update_short_rep(coder->state);
				} else {
					length_encode(coder->rep_len_encoder,
							len - MATCH_MIN_LEN, pos_state,
							best_compression);
					update_long_rep(coder->state);
				}

			} else {
				bit_encode_0(coder->is_rep[coder->state]);
				update_match(coder->state);
				length_encode(coder->match_len_encoder, len - MATCH_MIN_LEN,
						pos_state, best_compression);
				pos -= REP_DISTANCES;

				const uint32_t pos_slot = get_pos_slot(pos);
				const uint32_t len_to_pos_state = get_len_to_pos_state(len);
				bittree_encode(coder->pos_slot_encoder[len_to_pos_state],
						POS_SLOT_BITS, pos_slot);

				if (pos_slot >= START_POS_MODEL_INDEX) {
					const uint32_t footer_bits = (pos_slot >> 1) - 1;
					const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
					const uint32_t pos_reduced = pos - base;

					if (pos_slot < END_POS_MODEL_INDEX) {
						bittree_reverse_encode(
								coder->pos_encoders + base - pos_slot - 1,
								footer_bits, pos_reduced);
					} else {
						rc_encode_direct_bits(pos_reduced >> ALIGN_BITS,
								footer_bits - ALIGN_BITS);
						bittree_reverse_encode(coder->pos_align_encoder,
								ALIGN_BITS, pos_reduced & ALIGN_MASK);
						++coder->align_price_count;
					}
				}

				coder->rep_distances[3] = coder->rep_distances[2];
				coder->rep_distances[2] = coder->rep_distances[1];
				coder->rep_distances[1] = coder->rep_distances[0];
				coder->rep_distances[0] = pos;
				++coder->match_price_count;
			}

			coder->previous_byte = coder->lz.buffer[
					coder->lz.read_pos + len - 1
					- coder->additional_offset];
		}

		assert(coder->additional_offset >= len);
		coder->additional_offset -= len;
		coder->now_pos += len;
	}

	// Check if everything is done.
	bool all_done = false;
	if (coder->lz.sequence != SEQ_RUN
			&& coder->lz.read_pos == coder->lz.write_pos
			&& coder->additional_offset == 0) {
		assert(coder->longest_match_was_found == false);

		if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN
				|| coder->lz.sequence == SEQ_FLUSH) {
			// Write special marker: flush marker or end of payload
			// marker. Both are encoded as a match with distance of
			// UINT32_MAX. The match length codes the type of the marker.
			const uint32_t pos_state = coder->now_pos & pos_mask;
			bit_encode_1(coder->is_match[coder->state][pos_state]);
			bit_encode_0(coder->is_rep[coder->state]);
			update_match(coder->state);

			const uint32_t len = coder->lz.sequence == SEQ_FLUSH
					? LEN_SPECIAL_FLUSH : LEN_SPECIAL_EOPM;
			length_encode(coder->match_len_encoder, len - MATCH_MIN_LEN,
					pos_state, best_compression);

			const uint32_t pos_slot = (1 << POS_SLOT_BITS) - 1;
			const uint32_t len_to_pos_state = get_len_to_pos_state(len);
			bittree_encode(coder->pos_slot_encoder[len_to_pos_state],
					POS_SLOT_BITS, pos_slot);

			const uint32_t footer_bits = 30;
			const uint32_t pos_reduced
					= (UINT32_C(1) << footer_bits) - 1;
			rc_encode_direct_bits(pos_reduced >> ALIGN_BITS,
					footer_bits - ALIGN_BITS);

			bittree_reverse_encode(coder->pos_align_encoder, ALIGN_BITS,
					pos_reduced & ALIGN_MASK);
		}

		// Flush the last bytes of compressed data from
		// the range coder to the output buffer.
		rc_flush();

		rc_reset(rc);

		// All done. Note that some output bytes might be
		// pending in coder->lz.temp. lzma_lz_encode() will
		// take care of those bytes.
		all_done = true;
	}

	// Store local variables back to *coder.
	coder->rc = rc;
	*out_pos = out_pos_local;

	return all_done;
}
Imported to git. 2007-12-08 22:42:33 +00:00			`///////////////////////////////////////////////////////////////////////////////`
			`//`
			`/// \file lzma_encoder.c`
			`/// \brief LZMA encoder`
			`//`
			`// Copyright (C) 1999-2006 Igor Pavlov`
			`// Copyright (C) 2007 Lasse Collin`
			`//`
			`// This library is free software; you can redistribute it and/or`
			`// modify it under the terms of the GNU Lesser General Public`
			`// License as published by the Free Software Foundation; either`
			`// version 2.1 of the License, or (at your option) any later version.`
			`//`
			`// This library is distributed in the hope that it will be useful,`
			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`// Lesser General Public License for more details.`
			`//`
			`///////////////////////////////////////////////////////////////////////////////`

			`// NOTE: If you want to keep the line length in 80 characters, set`
			`// tab width to 4 or less in your editor when editing this file.`


			`#include "lzma_encoder_private.h"`
Revised the fastpos code. It now uses the slightly faster table-based version from LZMA SDK 4.57. This should be fast on most systems. A simpler and smaller alternative version is also provided. On some CPUs this can be even a little faster than the default table-based version (see comments in fastpos.h), but on most systems the table-based code is faster. 2008-01-15 12:02:22 +00:00			`#include "fastpos.h"`
Imported to git. 2007-12-08 22:42:33 +00:00

			`////////////`
			`// Macros //`
			`////////////`

			`// These are as macros mostly because they use local range encoder variables.`

			`#define literal_encode(subcoder, symbol) \`
			`do { \`
			`uint32_t context = 1; \`
			`int i = 8; \`
			`do { \`
			`--i; \`
			`const uint32_t bit = ((symbol) >> i) & 1; \`
			`bit_encode(subcoder[context], bit); \`
			`context = (context << 1) \| bit; \`
			`} while (i != 0); \`
			`} while (0)`


			`#define literal_encode_matched(subcoder, match_byte, symbol) \`
			`do { \`
			`uint32_t context = 1; \`
			`int i = 8; \`
			`do { \`
			`--i; \`
			`uint32_t bit = ((symbol) >> i) & 1; \`
			`const uint32_t match_bit = ((match_byte) >> i) & 1; \`
			`const uint32_t subcoder_index = 0x100 + (match_bit << 8) + context; \`
			`bit_encode(subcoder[subcoder_index], bit); \`
			`context = (context << 1) \| bit; \`
			`if (match_bit != bit) { \`
			`while (i != 0) { \`
			`--i; \`
			`bit = ((symbol) >> i) & 1; \`
			`bit_encode(subcoder[context], bit); \`
			`context = (context << 1) \| bit; \`
			`} \`
			`break; \`
			`} \`
			`} while (i != 0); \`
			`} while (0)`


			`#define length_encode(length_encoder, symbol, pos_state, update_price) \`
			`do { \`
Added two assert()s. 2008-04-24 17:19:20 +00:00			`assert((symbol) <= MATCH_MAX_LEN); \`
Imported to git. 2007-12-08 22:42:33 +00:00			`if ((symbol) < LEN_LOW_SYMBOLS) { \`
			`bit_encode_0((length_encoder).choice); \`
			`bittree_encode((length_encoder).low[pos_state], \`
			`LEN_LOW_BITS, symbol); \`
			`} else { \`
			`bit_encode_1((length_encoder).choice); \`
			`if ((symbol) < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS) { \`
			`bit_encode_0((length_encoder).choice2); \`
			`bittree_encode((length_encoder).mid[pos_state], \`
			`LEN_MID_BITS, \`
			`(symbol) - LEN_LOW_SYMBOLS); \`
			`} else { \`
			`bit_encode_1((length_encoder).choice2); \`
			`bittree_encode((length_encoder).high, LEN_HIGH_BITS, \`
			`(symbol) - LEN_LOW_SYMBOLS \`
			`- LEN_MID_SYMBOLS); \`
			`} \`
			`} \`
			`if (update_price) \`
			`if (--(length_encoder).counters[pos_state] == 0) \`
			`lzma_length_encoder_update_table(&(length_encoder), pos_state); \`
			`} while (0)`


			`///////////////`
			`// Functions //`
			`///////////////`

			`/// \brief Updates price table of the length encoder`
			`///`
Fix a typo in lzma_encoder.c. 2008-01-15 06:37:42 +00:00			`/// Like all the other prices in LZMA, these are used by lzma_get_optimum().`
Imported to git. 2007-12-08 22:42:33 +00:00			`///`
			`extern void`
			`lzma_length_encoder_update_table(lzma_length_encoder *lencoder,`
			`const uint32_t pos_state)`
			`{`
			`const uint32_t num_symbols = lencoder->table_size;`
			`const uint32_t a0 = bit_get_price_0(lencoder->choice);`
			`const uint32_t a1 = bit_get_price_1(lencoder->choice);`
			`const uint32_t b0 = a1 + bit_get_price_0(lencoder->choice2);`
			`const uint32_t b1 = a1 + bit_get_price_1(lencoder->choice2);`

			`uint32_t *prices = lencoder->prices[pos_state];`
			`uint32_t i = 0;`

Convert bittree_get_price() and bittree_reverse_get_price() from macros to inline functions. 2008-01-15 06:36:25 +00:00			`for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i)`
			`prices[i] = a0 + bittree_get_price(lencoder->low[pos_state],`
Imported to git. 2007-12-08 22:42:33 +00:00			`LEN_LOW_BITS, i);`

Convert bittree_get_price() and bittree_reverse_get_price() from macros to inline functions. 2008-01-15 06:36:25 +00:00			`for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i)`
			`prices[i] = b0 + bittree_get_price(lencoder->mid[pos_state],`
Imported to git. 2007-12-08 22:42:33 +00:00			`LEN_MID_BITS, i - LEN_LOW_SYMBOLS);`

Convert bittree_get_price() and bittree_reverse_get_price() from macros to inline functions. 2008-01-15 06:36:25 +00:00			`for (; i < num_symbols; ++i)`
			`prices[i] = b1 + bittree_get_price(`
			`lencoder->high, LEN_HIGH_BITS,`
Imported to git. 2007-12-08 22:42:33 +00:00			`i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);`

			`lencoder->counters[pos_state] = num_symbols;`

			`return;`
			`}`


			`/**`
			`* \brief LZMA encoder`
			`*`
			`* \return true if end of stream was reached, false otherwise.`
			`*/`
			`extern bool`
			`lzma_lzma_encode(lzma_coder coder, uint8_t restrict out,`
			`size_t *restrict out_pos, size_t out_size)`
			`{`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`#define rc_buffer coder->lz.temp`
			`#define rc_buffer_size coder->lz.temp_size`
Imported to git. 2007-12-08 22:42:33 +00:00
			`// Local copies`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`lzma_range_encoder rc = coder->rc;`
Imported to git. 2007-12-08 22:42:33 +00:00			`size_t out_pos_local = *out_pos;`
			`const uint32_t pos_mask = coder->pos_mask;`
			`const bool best_compression = coder->best_compression;`

			`// Initialize the stream if no data has been encoded yet.`
			`if (!coder->is_initialized) {`
			`if (coder->lz.read_pos == coder->lz.read_limit) {`
Fix data corruption in LZMA encoder. Note that this bug was specific to liblzma and was not present in LZMA SDK. 2008-03-14 21:16:11 +00:00			`if (coder->lz.sequence == SEQ_RUN)`
			`return false; // We cannot do anything.`

			`// We are finishing (we cannot get here when flushing).`
Fix LZMA_SYNC_FLUSH handling in LZ and LZMA encoders. That code is now almost completely in LZ coder, where it can be shared with other LZ77-based algorithms in future. 2008-01-18 18:02:52 +00:00			`assert(coder->lz.write_pos == coder->lz.read_pos);`
			`assert(coder->lz.sequence == SEQ_FINISH);`
Imported to git. 2007-12-08 22:42:33 +00:00			`} else {`
			`// Do the actual initialization.`
			`uint32_t len;`
			`uint32_t num_distance_pairs;`
			`lzma_read_match_distances(coder, &len, &num_distance_pairs);`

			`bit_encode_0(coder->is_match[coder->state][0]);`
Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. 2008-03-21 22:57:33 +00:00			`update_literal(coder->state);`
Imported to git. 2007-12-08 22:42:33 +00:00
			`const uint8_t cur_byte = coder->lz.buffer[`
			`coder->lz.read_pos - coder->additional_offset];`
			`probability *subcoder = literal_get_subcoder(coder->literal_coder,`
			`coder->now_pos, coder->previous_byte);`
			`literal_encode(subcoder, cur_byte);`

			`coder->previous_byte = cur_byte;`
			`--coder->additional_offset;`
			`++coder->now_pos;`

			`assert(coder->additional_offset == 0);`
			`}`

			`// Initialization is done (except if empty file).`
			`coder->is_initialized = true;`
			`}`

			`// Encoding loop`
			`while (true) {`
			`// Check that there is free output space.`
			`if (out_pos_local == out_size)`
			`break;`

			`assert(rc_buffer_size == 0);`

			`// Check that there is some input to process.`
			`if (coder->lz.read_pos >= coder->lz.read_limit) {`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`// If flushing or finishing, we must keep encoding`
			`// until additional_offset becomes zero to make`
			`// all the input available at output.`
			`if (coder->lz.sequence == SEQ_RUN`
Imported to git. 2007-12-08 22:42:33 +00:00			`\|\| coder->additional_offset == 0)`
			`break;`
			`}`

			`assert(coder->lz.read_pos <= coder->lz.write_pos);`

			`#ifndef NDEBUG`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`if (coder->lz.sequence != SEQ_RUN) {`
Imported to git. 2007-12-08 22:42:33 +00:00			`assert(coder->lz.read_limit == coder->lz.write_pos);`
			`} else {`
			`assert(coder->lz.read_limit + coder->lz.keep_size_after`
			`== coder->lz.write_pos);`
			`}`
			`#endif`

			`const uint32_t pos_state = coder->now_pos & pos_mask;`

			`uint32_t pos;`
			`uint32_t len;`

			`// Get optimal match (repeat position and length).`
			`// Value ranges for pos:`
			`// - [0, REP_DISTANCES): repeated match`
			`// - [REP_DISTANCES, UINT32_MAX): match at (pos - REP_DISTANCES)`
			`// - UINT32_MAX: not a match but a literal`
			`// Value ranges for len:`
			`// - [MATCH_MIN_LEN, MATCH_MAX_LEN]`
			`if (best_compression)`
			`lzma_get_optimum(coder, &pos, &len);`
			`else`
			`lzma_get_optimum_fast(coder, &pos, &len);`

			`if (len == 1 && pos == UINT32_MAX) {`
			`// It's a literal.`
			`bit_encode_0(coder->is_match[coder->state][pos_state]);`

			`const uint8_t cur_byte = coder->lz.buffer[`
			`coder->lz.read_pos - coder->additional_offset];`
			`probability *subcoder = literal_get_subcoder(coder->literal_coder,`
			`coder->now_pos, coder->previous_byte);`

Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. 2008-03-21 22:57:33 +00:00			`if (is_literal_state(coder->state)) {`
Imported to git. 2007-12-08 22:42:33 +00:00			`literal_encode(subcoder, cur_byte);`
			`} else {`
			`const uint8_t match_byte = coder->lz.buffer[`
			`coder->lz.read_pos`
			`- coder->rep_distances[0] - 1`
			`- coder->additional_offset];`
			`literal_encode_matched(subcoder, match_byte, cur_byte);`
			`}`

Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. 2008-03-21 22:57:33 +00:00			`update_literal(coder->state);`
Imported to git. 2007-12-08 22:42:33 +00:00			`coder->previous_byte = cur_byte;`

			`} else {`
			`// It's a match.`
			`bit_encode_1(coder->is_match[coder->state][pos_state]);`

			`if (pos < REP_DISTANCES) {`
			`// It's a repeated match i.e. the same distance`
			`// has been used earlier.`
			`bit_encode_1(coder->is_rep[coder->state]);`

			`if (pos == 0) {`
			`bit_encode_0(coder->is_rep0[coder->state]);`
			`const uint32_t symbol = (len == 1) ? 0 : 1;`
			`bit_encode(coder->is_rep0_long[coder->state][pos_state],`
			`symbol);`
			`} else {`
			`const uint32_t distance = coder->rep_distances[pos];`
			`bit_encode_1(coder->is_rep0[coder->state]);`

			`if (pos == 1) {`
			`bit_encode_0(coder->is_rep1[coder->state]);`
			`} else {`
			`bit_encode_1(coder->is_rep1[coder->state]);`
			`bit_encode(coder->is_rep2[coder->state], pos - 2);`

			`if (pos == 3)`
			`coder->rep_distances[3] = coder->rep_distances[2];`

			`coder->rep_distances[2] = coder->rep_distances[1];`
			`}`

			`coder->rep_distances[1] = coder->rep_distances[0];`
			`coder->rep_distances[0] = distance;`
			`}`

			`if (len == 1) {`
			`update_short_rep(coder->state);`
			`} else {`
Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. 2008-03-21 22:57:33 +00:00			`length_encode(coder->rep_len_encoder,`
Imported to git. 2007-12-08 22:42:33 +00:00			`len - MATCH_MIN_LEN, pos_state,`
			`best_compression);`
Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. 2008-03-21 22:57:33 +00:00			`update_long_rep(coder->state);`
Imported to git. 2007-12-08 22:42:33 +00:00			`}`

			`} else {`
			`bit_encode_0(coder->is_rep[coder->state]);`
			`update_match(coder->state);`
Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. 2008-03-21 22:57:33 +00:00			`length_encode(coder->match_len_encoder, len - MATCH_MIN_LEN,`
Imported to git. 2007-12-08 22:42:33 +00:00			`pos_state, best_compression);`
			`pos -= REP_DISTANCES;`

			`const uint32_t pos_slot = get_pos_slot(pos);`
			`const uint32_t len_to_pos_state = get_len_to_pos_state(len);`
			`bittree_encode(coder->pos_slot_encoder[len_to_pos_state],`
			`POS_SLOT_BITS, pos_slot);`

			`if (pos_slot >= START_POS_MODEL_INDEX) {`
			`const uint32_t footer_bits = (pos_slot >> 1) - 1;`
			`const uint32_t base = (2 \| (pos_slot & 1)) << footer_bits;`
			`const uint32_t pos_reduced = pos - base;`

			`if (pos_slot < END_POS_MODEL_INDEX) {`
			`bittree_reverse_encode(`
			`coder->pos_encoders + base - pos_slot - 1,`
			`footer_bits, pos_reduced);`
			`} else {`
			`rc_encode_direct_bits(pos_reduced >> ALIGN_BITS,`
			`footer_bits - ALIGN_BITS);`
			`bittree_reverse_encode(coder->pos_align_encoder,`
			`ALIGN_BITS, pos_reduced & ALIGN_MASK);`
			`++coder->align_price_count;`
			`}`
			`}`

			`coder->rep_distances[3] = coder->rep_distances[2];`
			`coder->rep_distances[2] = coder->rep_distances[1];`
			`coder->rep_distances[1] = coder->rep_distances[0];`
			`coder->rep_distances[0] = pos;`
			`++coder->match_price_count;`
			`}`

			`coder->previous_byte = coder->lz.buffer[`
			`coder->lz.read_pos + len - 1`
			`- coder->additional_offset];`
			`}`

			`assert(coder->additional_offset >= len);`
			`coder->additional_offset -= len;`
			`coder->now_pos += len;`
			`}`

			`// Check if everything is done.`
			`bool all_done = false;`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`if (coder->lz.sequence != SEQ_RUN`
Imported to git. 2007-12-08 22:42:33 +00:00			`&& coder->lz.read_pos == coder->lz.write_pos`
			`&& coder->additional_offset == 0) {`
Added two assert()s. 2008-04-24 17:19:20 +00:00			`assert(coder->longest_match_was_found == false);`

Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN`
			`\|\| coder->lz.sequence == SEQ_FLUSH) {`
			`// Write special marker: flush marker or end of payload`
			`// marker. Both are encoded as a match with distance of`
			`// UINT32_MAX. The match length codes the type of the marker.`
Imported to git. 2007-12-08 22:42:33 +00:00			`const uint32_t pos_state = coder->now_pos & pos_mask;`
			`bit_encode_1(coder->is_match[coder->state][pos_state]);`
			`bit_encode_0(coder->is_rep[coder->state]);`
			`update_match(coder->state);`

Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`const uint32_t len = coder->lz.sequence == SEQ_FLUSH`
			`? LEN_SPECIAL_FLUSH : LEN_SPECIAL_EOPM;`
Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. 2008-03-21 22:57:33 +00:00			`length_encode(coder->match_len_encoder, len - MATCH_MIN_LEN,`
Imported to git. 2007-12-08 22:42:33 +00:00			`pos_state, best_compression);`

			`const uint32_t pos_slot = (1 << POS_SLOT_BITS) - 1;`
			`const uint32_t len_to_pos_state = get_len_to_pos_state(len);`
			`bittree_encode(coder->pos_slot_encoder[len_to_pos_state],`
			`POS_SLOT_BITS, pos_slot);`

			`const uint32_t footer_bits = 30;`
			`const uint32_t pos_reduced`
			`= (UINT32_C(1) << footer_bits) - 1;`
			`rc_encode_direct_bits(pos_reduced >> ALIGN_BITS,`
			`footer_bits - ALIGN_BITS);`

			`bittree_reverse_encode(coder->pos_align_encoder, ALIGN_BITS,`
			`pos_reduced & ALIGN_MASK);`
			`}`

			`// Flush the last bytes of compressed data from`
			`// the range coder to the output buffer.`
			`rc_flush();`

Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`rc_reset(rc);`

Imported to git. 2007-12-08 22:42:33 +00:00			`// All done. Note that some output bytes might be`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`// pending in coder->lz.temp. lzma_lz_encode() will`
Imported to git. 2007-12-08 22:42:33 +00:00			`// take care of those bytes.`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`all_done = true;`
Imported to git. 2007-12-08 22:42:33 +00:00			`}`

			`// Store local variables back to *coder.`
Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. 2008-01-14 11:39:54 +00:00			`coder->rc = rc;`
Imported to git. 2007-12-08 22:42:33 +00:00			`*out_pos = out_pos_local;`

			`return all_done;`
			`}`