From e141fe18950400faaa3503ff88ac20eacd73e88c Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sat, 19 Jan 2008 21:16:33 +0200 Subject: [PATCH] Implemented LZMA_SYNC_FLUSH support to the Subblock encoder. The API for handing Subfilters was changed to make it consistent with LZMA_SYNC_FLUSH. A few sanity checks were added for Subfilter handling. Some small bugs were fixed. More comments were added. --- src/liblzma/api/lzma/subblock.h | 29 ++- src/liblzma/subblock/subblock_encoder.c | 263 ++++++++++++++++++------ 2 files changed, 214 insertions(+), 78 deletions(-) diff --git a/src/liblzma/api/lzma/subblock.h b/src/liblzma/api/lzma/subblock.h index 0474b6af..1db35b13 100644 --- a/src/liblzma/api/lzma/subblock.h +++ b/src/liblzma/api/lzma/subblock.h @@ -95,9 +95,10 @@ typedef struct { * input_offset % alignment == output_offset % alignment * * The Subblock filter assumes that the first output byte will be - * written to a position in the output stream that is properly aligned. - * - * FIXME desc + * written to a position in the output stream that is properly + * aligned. This requirement is automatically met when the start + * offset of the Stream or Block is correctly told to Block or + * Stream encoder. */ uint32_t alignment; # define LZMA_SUBBLOCK_ALIGNMENT_MIN 1 @@ -161,16 +162,17 @@ typedef struct { * * When subfilter_mode is LZMA_SUBFILTER_NONE, the application may * put Subfilter options to subfilter_options structure, and then - * set subfilter_mode to LZMA_SUBFILTER_SET. This implies setting - * flush to true. No new input data will be read until the Subfilter - * has been enabled. Once the Subfilter has been enabled, liblzma - * will set subfilter_mode to LZMA_SUBFILTER_RUN. + * set subfilter_mode to LZMA_SUBFILTER_SET. No new input data will + * be read until the Subfilter has been enabled. Once the Subfilter + * has been enabled, liblzma will set subfilter_mode to + * LZMA_SUBFILTER_RUN. * * When subfilter_mode is LZMA_SUBFILTER_RUN, the application may - * set subfilter_mode to LZMA_SUBFILTER_FINISH. No new input data - * will be read until the Subfilter has been finished. Once the - * Subfilter has been finished, liblzma will set subfilter_mode - * to LZMA_SUBFILTER_NONE. + * set subfilter_mode to LZMA_SUBFILTER_FINISH. All the input + * currently available will be encoded before unsetting the + * Subfilter. Application must not change the amount of available + * input until the Subfilter has finished. Once the Subfilter has + * finished, liblzma will set subfilter_mode to LZMA_SUBFILTER_NONE. * * If the intent is to have Subfilter enabled to the very end of * the data, it is not needed to separately disable Subfilter with @@ -178,6 +180,11 @@ typedef struct { * of lzma_code() will make the Subblock encoder to disable the * Subfilter once all the data has been ran through the Subfilter. * + * After the first call with LZMA_SYNC_FLUSH or LZMA_FINISH, the + * application must not change subfilter_mode until LZMA_STREAM_END. + * Setting LZMA_SUBFILTER_SET/LZMA_SUBFILTER_FINISH and + * LZMA_SYNC_FLUSH/LZMA_FINISH _at the same time_ is fine. + * * \note This variable is ignored if allow_subfilters is false. */ lzma_subfilter_mode subfilter_mode; diff --git a/src/liblzma/subblock/subblock_encoder.c b/src/liblzma/subblock/subblock_encoder.c index 96129d87..d033ea2c 100644 --- a/src/liblzma/subblock/subblock_encoder.c +++ b/src/liblzma/subblock/subblock_encoder.c @@ -3,7 +3,7 @@ /// \file subblock_encoder.c /// \brief Encoder of the Subblock filter // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007, 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -61,27 +61,57 @@ struct lzma_coder_s { SEQ_SUBFILTER_FLAGS, } sequence; + /// Pointer to the options given by the application. This is used + /// for two-way communication with the application. lzma_options_subblock *options; + /// Position in various arrays. size_t pos; + + /// Holds subblock.size - 1 or rle.size - 1 when encoding size + /// of Data or Repeat Count. uint32_t tmp; struct { + /// This is a copy of options->alignment, or + /// LZMA_SUBBLOCK_ALIGNMENT_DEFAULT if options is NULL. uint32_t multiple; + + /// Number of input bytes that we have already read but + /// not yet started writing out. uint32_t in_pending; + + /// Number of input bytes which we have processed and started + /// writing out. 32-bit integer is enough since we care only + /// about the lowest bits when fixing alignment. uint32_t in_pos; + + /// Number of bytes written out. uint32_t out_pos; } alignment; struct { + /// Pointer to allocated buffer holding the Data field + /// of Subblock Type "Data". uint8_t *data; + + /// Number of bytes in the buffer. size_t size; + + /// Allocated size of the buffer. size_t limit; } subblock; struct { + /// Buffer to hold the data that may be coded with + /// Subblock Type `Repeating Data'. uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + + /// Number of bytes in buffer[]. size_t size; + + /// Number of times the first `size' bytes of buffer[] + /// will be repeated. lzma_vli count; } rle; @@ -90,15 +120,38 @@ struct lzma_coder_s { SUB_NONE, SUB_SET, SUB_RUN, + SUB_FLUSH, SUB_FINISH, SUB_END_MARKER, } mode; + /// This is a copy of options->allow_subfilters. We use + /// this to verify that the application doesn't change + /// the value of allow_subfilters. + bool allow; + + /// When this is true, application is not allowed to modify + /// options->subblock_mode. We may still modify it here. + bool mode_locked; + + /// True if we have encoded at least one byte of data with + /// the Subfilter. bool got_input; + /// Track the amount of input available once + /// LZMA_SUBFILTER_FINISH has been enabled. + /// This is needed for sanity checking (kind + /// of duplicating what common/code.c does). + size_t in_avail; + + /// Buffer for the Filter Flags field written after + /// the `Set Subfilter' indicator. uint8_t *flags; + + /// Size of Filter Flags field. uint32_t flags_size; + /// Pointers to Subfilter. lzma_next_coder subcoder; } subfilter; @@ -234,8 +287,16 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { + // Changing allow_subfilter is not allowed. + if (coder->options != NULL && coder->subfilter.allow + != coder->options->allow_subfilters) + return LZMA_PROG_ERROR; + // Check if we need to do something special with the Subfilter. - if (coder->options != NULL && coder->options->allow_subfilters) { + if (coder->subfilter.allow) { + assert(coder->options != NULL); + + // See if subfilter_mode has been changed. switch (coder->options->subfilter_mode) { case LZMA_SUBFILTER_NONE: if (coder->subfilter.mode != SUB_NONE) @@ -243,8 +304,9 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, break; case LZMA_SUBFILTER_SET: - if (coder->subfilter.mode != SUB_NONE) - return LZMA_HEADER_ERROR; + if (coder->subfilter.mode_locked + || coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; coder->subfilter.mode = SUB_SET; coder->subfilter.got_input = false; @@ -257,28 +319,47 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, case LZMA_SUBFILTER_RUN: if (coder->subfilter.mode != SUB_RUN) return LZMA_PROG_ERROR; + break; - case LZMA_SUBFILTER_FINISH: - if (coder->subfilter.mode == SUB_RUN) + case LZMA_SUBFILTER_FINISH: { + const size_t in_avail = in_size - *in_pos; + + if (coder->subfilter.mode == SUB_RUN) { + if (coder->subfilter.mode_locked) + return LZMA_PROG_ERROR; + coder->subfilter.mode = SUB_FINISH; - else if (coder->subfilter.mode != SUB_FINISH) - return LZMA_PROG_ERROR; + coder->subfilter.in_avail = in_avail; - if (!coder->subfilter.got_input) + } else if (coder->subfilter.mode != SUB_FINISH + || coder->subfilter.in_avail + != in_avail) { return LZMA_PROG_ERROR; + } break; + } default: return LZMA_HEADER_ERROR; } + + // If we are sync-flushing or finishing, the application may + // no longer change subfilter_mode. Note that this check is + // done after checking the new subfilter_mode above; this + // way the application may e.g. set LZMA_SUBFILTER_SET and + // LZMA_SYNC_FLUSH at the same time, but it cannot modify + // subfilter_mode on the later lzma_code() calls before + // we have returned LZMA_STREAM_END. + if (action != LZMA_RUN) + coder->subfilter.mode_locked = true; } // Main loop while (*out_pos < out_size) switch (coder->sequence) { - case SEQ_FILL: { + case SEQ_FILL: // Grab the new Subblock Data Size and reallocate the buffer. if (coder->subblock.size == 0 && coder->options != NULL && coder->options->subblock_data_size @@ -297,44 +378,77 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, &coder->subblock.size, coder->subblock.limit); - } else { - const size_t in_start = *in_pos; - lzma_ret ret; + // If we ran out of input before the whole buffer + // was filled, return to application. + if (coder->subblock.size < coder->subblock.limit + && action == LZMA_RUN) + return LZMA_OK; - if (coder->subfilter.mode == SUB_FINISH) { - // Let the Subfilter write out pending data, - // but don't give it any new input anymore. - size_t dummy = 0; - ret = coder->subfilter.subcoder.code(coder - ->subfilter.subcoder.coder, - allocator, NULL, &dummy, 0, - coder->subblock.data, - &coder->subblock.size, - coder->subblock.limit, - LZMA_FINISH); - } else { - // Give our input data to the Subfilter. Note - // that action can be LZMA_FINISH. In that - // case, we filter everything until the end - // of the input. The application isn't required - // to separately set LZMA_SUBBLOCK_FINISH. - ret = coder->subfilter.subcoder.code(coder - ->subfilter.subcoder.coder, - allocator, in, in_pos, in_size, - coder->subblock.data, - &coder->subblock.size, - coder->subblock.limit, - action); + } else { + assert(coder->options->subfilter_mode + != LZMA_SUBFILTER_SET); + + // Using LZMA_FINISH automatically toggles + // LZMA_SUBFILTER_FINISH. + // + // NOTE: It is possible that application had set + // LZMA_SUBFILTER_SET and LZMA_FINISH at the same + // time. In that case it is possible that we will + // cycle to LZMA_SUBFILTER_RUN, LZMA_SUBFILTER_FINISH, + // and back to LZMA_SUBFILTER_NONE in a single + // Subblock encoder function call. + if (action == LZMA_FINISH) { + coder->options->subfilter_mode + = LZMA_SUBFILTER_FINISH; + coder->subfilter.mode = SUB_FINISH; } - const size_t in_used = *in_pos - in_start; + const size_t in_start = *in_pos; + const lzma_ret ret = coder->subfilter.subcoder.code( + coder->subfilter.subcoder.coder, + allocator, in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit, + coder->subfilter.mode == SUB_FINISH + ? LZMA_FINISH : action); + + const size_t in_used = *in_pos - in_start; + coder->alignment.in_pending += in_used; if (in_used > 0) coder->subfilter.got_input = true; - coder->alignment.in_pending += in_used; + coder->subfilter.in_avail = in_size - *in_pos; if (ret == LZMA_STREAM_END) { + // All currently available input must have + // been processed. + assert(*in_pos == in_size); + + // Flush now. Even if coder->subblock.size + // happened to be zero, we still need to go + // to SEQ_FLUSH to possibly finish RLE or + // write the Subfilter Unset indicator. + coder->sequence = SEQ_FLUSH; + + if (coder->subfilter.mode == SUB_RUN) { + // Flushing with Subfilter enabled. + assert(action == LZMA_SYNC_FLUSH); + coder->subfilter.mode = SUB_FLUSH; + break; + } + + // Subfilter finished its job. + assert(coder->subfilter.mode == SUB_FINISH + || action == LZMA_FINISH); + + // At least one byte of input must have been + // encoded with the Subfilter. This is + // required by the file format specification. + if (!coder->subfilter.got_input) + return LZMA_PROG_ERROR; + // We don't strictly need to do this, but // doing it sounds like a good idea, because // otherwise the Subfilter's memory could be @@ -343,35 +457,30 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, lzma_next_coder_end(&coder->subfilter.subcoder, allocator); - assert(coder->options != NULL); - coder->options->subfilter_mode - = LZMA_SUBFILTER_NONE; - - assert(coder->subfilter.mode == SUB_FINISH - || action == LZMA_FINISH); + // We need to flush the currently buffered + // data and write Unset Subfilter marker. + // Note that we cannot set + // coder->options->subfilter_mode to + // LZMA_SUBFILTER_NONE yet, because we + // haven't written the Unset Subfilter + // marker yet. coder->subfilter.mode = SUB_END_MARKER; - - // Flush now. Even if coder->subblock.size - // happens to be zero, we still need to go - // to SEQ_FLUSH to write the Subfilter Unset - // indicator. coder->sequence = SEQ_FLUSH; break; } - // Return if an error occurred. - if (ret != LZMA_OK) + // Return if we couldn't fill the buffer or + // if an error occurred. + if (coder->subblock.size < coder->subblock.limit + || ret != LZMA_OK) return ret; } - // If we ran out of input before the whole buffer - // was filled, return to application. - if (coder->subblock.size < coder->subblock.limit - && action != LZMA_FINISH) - return LZMA_OK; - coder->sequence = SEQ_FLUSH; - } + + // SEQ_FILL doesn't produce any output so falling through + // to SEQ_FLUSH is safe. + assert(*out_pos < out_size); // Fall through @@ -471,21 +580,33 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, break; } - write_byte(0x50); + coder->options->subfilter_mode = LZMA_SUBFILTER_NONE; coder->subfilter.mode = SUB_NONE; + + write_byte(0x50); if (*out_pos == out_size) return LZMA_OK; } // Check if we have already written everything. - if (action == LZMA_FINISH && *in_pos == in_size - && coder->subfilter.mode == SUB_NONE) { + if (action != LZMA_RUN && *in_pos == in_size + && (coder->subfilter.mode == SUB_NONE + || coder->subfilter.mode == SUB_FLUSH)) { if (coder->rle.count > 0) { subblock_rle_flush(coder); break; } - if (coder->use_eopm) { + if (action == LZMA_SYNC_FLUSH) { + if (coder->subfilter.mode == SUB_FLUSH) + coder->subfilter.mode = SUB_RUN; + + coder->subfilter.mode_locked = false; + coder->sequence = SEQ_FILL; + + } else if (coder->use_eopm) { + assert(action == LZMA_FINISH); + // NOTE: No need to use write_byte() here // since we are finishing. out[*out_pos] = 0x10; @@ -586,7 +707,6 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, return LZMA_OK; coder->alignment.out_pos += coder->subblock.size; - coder->subblock.size = 0; coder->pos = 0; coder->sequence = SEQ_FLUSH; @@ -642,7 +762,12 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, coder->options->subfilter_mode = LZMA_SUBFILTER_RUN; coder->subfilter.mode = SUB_RUN; + coder->alignment.out_pos += coder->subfilter.flags_size; coder->sequence = SEQ_SUBFILTER_FLAGS; + + // It is safe to fall through because SEQ_SUBFILTER_FLAGS + // uses bufcpy() which doesn't write unless there is output + // space. } // Fall through @@ -681,7 +806,7 @@ subblock_encode(lzma_coder *coder, lzma_allocator *allocator, out, out_pos, out_size, action); while (*out_pos < out_size - && (*in_pos < in_size || action == LZMA_FINISH)) { + && (*in_pos < in_size || action != LZMA_RUN)) { if (!coder->next_finished && coder->temp.pos == coder->temp.size) { coder->temp.pos = 0; @@ -692,7 +817,7 @@ subblock_encode(lzma_coder *coder, lzma_allocator *allocator, coder->temp.buffer, &coder->temp.size, LZMA_BUFFER_SIZE, action); if (ret == LZMA_STREAM_END) { - assert(action == LZMA_FINISH); + assert(action != LZMA_RUN); coder->next_finished = true; } else if (coder->temp.size == 0 || ret != LZMA_OK) { return ret; @@ -704,7 +829,7 @@ subblock_encode(lzma_coder *coder, lzma_allocator *allocator, coder->temp.size, out, out_pos, out_size, coder->next_finished ? LZMA_FINISH : LZMA_RUN); if (ret == LZMA_STREAM_END) { - assert(action == LZMA_FINISH); + assert(action != LZMA_RUN); assert(coder->next_finished); return LZMA_STREAM_END; } @@ -765,6 +890,7 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->subblock.size = 0; next->coder->rle.count = 0; next->coder->subfilter.mode = SUB_NONE; + next->coder->subfilter.mode_locked = false; next->coder->temp.pos = 0; next->coder->temp.size = 0; @@ -781,10 +907,13 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, } next->coder->alignment.multiple = next->coder->options->alignment; + next->coder->subfilter.allow + = next->coder->options->allow_subfilters; subblock_size_limit = next->coder->options->subblock_data_size; } else { next->coder->alignment.multiple = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; + next->coder->subfilter.allow = false; subblock_size_limit = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT; }