Added initial support for preset dictionary for raw LZMA1

and LZMA2. It is not supported by the .xz format or the xz
command line tool yet.
This commit is contained in:
Lasse Collin 2009-01-27 18:36:05 +02:00
parent 449b8c832b
commit f76e39cf93
8 changed files with 77 additions and 27 deletions

View File

@ -210,7 +210,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
lzma_allocator *allocator, const void *options,
size_t *dict_size))
lzma_lz_options *lz_options))
{
// Allocate the base structure if it isn't already allocated.
if (next->coder == NULL) {
@ -229,17 +229,17 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
// Allocate and initialize the LZ-based decoder. It will also give
// us the dictionary size.
size_t dict_size;
lzma_lz_options lz_options;
return_if_error(lz_init(&next->coder->lz, allocator,
filters[0].options, &dict_size));
filters[0].options, &lz_options));
// If the dictionary size is very small, increase it to 4096 bytes.
// This is to prevent constant wrapping of the dictionary, which
// would slow things down. The downside is that since we don't check
// separately for the real dictionary size, we may happily accept
// corrupt files.
if (dict_size < 4096)
dict_size = 4096;
if (lz_options.dict_size < 4096)
lz_options.dict_size = 4096;
// Make dictionary size a multipe of 16. Some LZ-based decoders like
// LZMA use the lowest bits lzma_dict.pos to know the alignment of the
@ -248,23 +248,38 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
// recommended to give aligned buffers to liblzma.
//
// Avoid integer overflow.
if (dict_size > SIZE_MAX - 15)
if (lz_options.dict_size > SIZE_MAX - 15)
return LZMA_MEM_ERROR;
dict_size = (dict_size + 15) & ~((size_t)(15));
lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
// Allocate and initialize the dictionary.
if (next->coder->dict.size != dict_size) {
if (next->coder->dict.size != lz_options.dict_size) {
lzma_free(next->coder->dict.buf, allocator);
next->coder->dict.buf = lzma_alloc(dict_size, allocator);
next->coder->dict.buf
= lzma_alloc(lz_options.dict_size, allocator);
if (next->coder->dict.buf == NULL)
return LZMA_MEM_ERROR;
next->coder->dict.size = dict_size;
next->coder->dict.size = lz_options.dict_size;
}
lz_decoder_reset(next->coder);
// Use the preset dictionary if it was given to us.
if (lz_options.preset_dict != NULL
&& lz_options.preset_dict_size > 0) {
// If the preset dictionary is bigger than the actual
// dictionary, copy only the tail.
const size_t copy_size = MIN(lz_options.preset_dict_size,
lz_options.dict_size);
const size_t offset = lz_options.preset_dict_size - copy_size;
memcpy(next->coder->dict.buf, lz_options.preset_dict + offset,
copy_size);
next->coder->dict.pos = copy_size;
next->coder->dict.full = copy_size;
}
// Miscellaneous initializations
next->coder->next_finished = false;
next->coder->this_finished = false;

View File

@ -51,6 +51,13 @@ typedef struct {
} lzma_dict;
typedef struct {
size_t dict_size;
const uint8_t *preset_dict;
size_t preset_dict_size;
} lzma_lz_options;
typedef struct {
/// Data specific to the LZ-based decoder
lzma_coder *coder;
@ -86,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
lzma_allocator *allocator, const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
lzma_allocator *allocator, const void *options,
size_t *dict_size));
lzma_lz_options *lz_options));
extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);

View File

@ -363,7 +363,8 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
static bool
lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
const lzma_lz_options *lz_options)
{
// Allocate the history buffer.
if (mf->buffer == NULL) {
@ -421,6 +422,19 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
// we avoid wasting RAM and improve initialization speed a lot.
//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
// Handle preset dictionary.
if (lz_options->preset_dict != NULL
&& lz_options->preset_dict_size > 0) {
// If the preset dictionary is bigger than the actual
// dictionary, use only the tail.
mf->write_pos = MIN(lz_options->preset_dict_size, mf->size);
memcpy(mf->buffer, lz_options->preset_dict
+ lz_options->preset_dict_size - mf->write_pos,
mf->write_pos);
mf->action = LZMA_SYNC_FLUSH;
mf->skip(mf, mf->write_pos);
}
mf->action = LZMA_RUN;
return false;
@ -509,7 +523,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
// Allocate new buffers if needed, and do the rest of
// the initialization.
if (lz_encoder_init(&next->coder->mf, allocator))
if (lz_encoder_init(&next->coder->mf, allocator, &lz_options))
return LZMA_MEM_ERROR;
// Initialize the next filter in the chain, if any.

View File

@ -230,7 +230,7 @@ lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
static lzma_ret
lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *options, size_t *dict_size)
const void *opt, lzma_lz_options *lz_options)
{
if (lz->coder == NULL) {
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
@ -243,12 +243,15 @@ lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
lz->coder->lzma = LZMA_LZ_DECODER_INIT;
}
const lzma_options_lzma *options = opt;
lz->coder->sequence = SEQ_CONTROL;
lz->coder->need_properties = true;
lz->coder->need_dictionary_reset = true;
lz->coder->need_dictionary_reset = options->preset_dict == NULL
|| options->preset_dict_size == 0;
return lzma_lzma_decoder_create(&lz->coder->lzma,
allocator, options, dict_size);
allocator, options, lz_options);
}

View File

@ -318,15 +318,17 @@ lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
lz->coder->lzma = NULL;
}
lz->coder->sequence = SEQ_INIT;
lz->coder->need_properties = true;
lz->coder->need_state_reset = false;
lz->coder->need_dictionary_reset = true;
lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
lz->coder->opt_new = lz->coder->opt_cur.persistent
? options : NULL;
lz->coder->sequence = SEQ_INIT;
lz->coder->need_properties = true;
lz->coder->need_state_reset = false;
lz->coder->need_dictionary_reset
= lz->coder->opt_cur.preset_dict == NULL
|| lz->coder->opt_cur.preset_dict_size == 0;
// Initialize LZMA encoder
return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
&lz->coder->opt_cur, lz_options));

View File

@ -941,7 +941,7 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt)
extern lzma_ret
lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *opt, size_t *dict_size)
const void *opt, lzma_lz_options *lz_options)
{
if (lz->coder == NULL) {
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
@ -956,7 +956,9 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
// All dictionary sizes are OK here. LZ decoder will take care of
// the special cases.
const lzma_options_lzma *options = opt;
*dict_size = options->dict_size;
lz_options->dict_size = options->dict_size;
lz_options->preset_dict = options->preset_dict;
lz_options->preset_dict_size = options->preset_dict_size;
return LZMA_OK;
}
@ -967,13 +969,13 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
/// the LZ initialization).
static lzma_ret
lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *options, size_t *dict_size)
const void *options, lzma_lz_options *lz_options)
{
if (!is_lclppb_valid(options))
return LZMA_PROG_ERROR;
return_if_error(lzma_lzma_decoder_create(
lz, allocator, options, dict_size));
lz, allocator, options, lz_options));
lzma_decoder_reset(lz->coder, options);
lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);

View File

@ -48,7 +48,7 @@ extern bool lzma_lzma_lclppb_decode(
/// LZMA2 decoders.
extern lzma_ret lzma_lzma_decoder_create(
lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *opt, size_t *dict_size);
const void *opt, lzma_lz_options *lz_options);
/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb.

View File

@ -274,6 +274,8 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf,
static bool
encode_init(lzma_coder *coder, lzma_mf *mf)
{
assert(mf_position(mf) == 0);
if (mf->read_pos == mf->read_limit) {
if (mf->action == LZMA_RUN)
return false; // We cannot do anything.
@ -594,7 +596,12 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
return LZMA_OPTIONS_ERROR;
}
coder->is_initialized = false;
// We don't need to write the first byte as literal if there is
// a non-empty preset dictionary. encode_init() wouldn't even work
// if there is a non-empty preset dictionary, because encode_init()
// assumes that position is zero and previous byte is also zero.
coder->is_initialized = options->preset_dict != NULL
&& options->preset_dict_size > 0;
coder->is_flushed = false;
set_lz_options(lz_options, options);