зеркало из
https://git.tukaani.org/xz.git
synced 2025-07-06 12:26:39 +00:00
Added initial support for preset dictionary for raw LZMA1
and LZMA2. It is not supported by the .xz format or the xz command line tool yet.
Этот коммит содержится в:
родитель
449b8c832b
Коммит
f76e39cf93
@ -210,7 +210,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||||||
const lzma_filter_info *filters,
|
const lzma_filter_info *filters,
|
||||||
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
|
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
|
||||||
lzma_allocator *allocator, const void *options,
|
lzma_allocator *allocator, const void *options,
|
||||||
size_t *dict_size))
|
lzma_lz_options *lz_options))
|
||||||
{
|
{
|
||||||
// Allocate the base structure if it isn't already allocated.
|
// Allocate the base structure if it isn't already allocated.
|
||||||
if (next->coder == NULL) {
|
if (next->coder == NULL) {
|
||||||
@ -229,17 +229,17 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||||||
|
|
||||||
// Allocate and initialize the LZ-based decoder. It will also give
|
// Allocate and initialize the LZ-based decoder. It will also give
|
||||||
// us the dictionary size.
|
// us the dictionary size.
|
||||||
size_t dict_size;
|
lzma_lz_options lz_options;
|
||||||
return_if_error(lz_init(&next->coder->lz, allocator,
|
return_if_error(lz_init(&next->coder->lz, allocator,
|
||||||
filters[0].options, &dict_size));
|
filters[0].options, &lz_options));
|
||||||
|
|
||||||
// If the dictionary size is very small, increase it to 4096 bytes.
|
// If the dictionary size is very small, increase it to 4096 bytes.
|
||||||
// This is to prevent constant wrapping of the dictionary, which
|
// This is to prevent constant wrapping of the dictionary, which
|
||||||
// would slow things down. The downside is that since we don't check
|
// would slow things down. The downside is that since we don't check
|
||||||
// separately for the real dictionary size, we may happily accept
|
// separately for the real dictionary size, we may happily accept
|
||||||
// corrupt files.
|
// corrupt files.
|
||||||
if (dict_size < 4096)
|
if (lz_options.dict_size < 4096)
|
||||||
dict_size = 4096;
|
lz_options.dict_size = 4096;
|
||||||
|
|
||||||
// Make dictionary size a multipe of 16. Some LZ-based decoders like
|
// Make dictionary size a multipe of 16. Some LZ-based decoders like
|
||||||
// LZMA use the lowest bits lzma_dict.pos to know the alignment of the
|
// LZMA use the lowest bits lzma_dict.pos to know the alignment of the
|
||||||
@ -248,23 +248,38 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||||||
// recommended to give aligned buffers to liblzma.
|
// recommended to give aligned buffers to liblzma.
|
||||||
//
|
//
|
||||||
// Avoid integer overflow.
|
// Avoid integer overflow.
|
||||||
if (dict_size > SIZE_MAX - 15)
|
if (lz_options.dict_size > SIZE_MAX - 15)
|
||||||
return LZMA_MEM_ERROR;
|
return LZMA_MEM_ERROR;
|
||||||
|
|
||||||
dict_size = (dict_size + 15) & ~((size_t)(15));
|
lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
|
||||||
|
|
||||||
// Allocate and initialize the dictionary.
|
// Allocate and initialize the dictionary.
|
||||||
if (next->coder->dict.size != dict_size) {
|
if (next->coder->dict.size != lz_options.dict_size) {
|
||||||
lzma_free(next->coder->dict.buf, allocator);
|
lzma_free(next->coder->dict.buf, allocator);
|
||||||
next->coder->dict.buf = lzma_alloc(dict_size, allocator);
|
next->coder->dict.buf
|
||||||
|
= lzma_alloc(lz_options.dict_size, allocator);
|
||||||
if (next->coder->dict.buf == NULL)
|
if (next->coder->dict.buf == NULL)
|
||||||
return LZMA_MEM_ERROR;
|
return LZMA_MEM_ERROR;
|
||||||
|
|
||||||
next->coder->dict.size = dict_size;
|
next->coder->dict.size = lz_options.dict_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
lz_decoder_reset(next->coder);
|
lz_decoder_reset(next->coder);
|
||||||
|
|
||||||
|
// Use the preset dictionary if it was given to us.
|
||||||
|
if (lz_options.preset_dict != NULL
|
||||||
|
&& lz_options.preset_dict_size > 0) {
|
||||||
|
// If the preset dictionary is bigger than the actual
|
||||||
|
// dictionary, copy only the tail.
|
||||||
|
const size_t copy_size = MIN(lz_options.preset_dict_size,
|
||||||
|
lz_options.dict_size);
|
||||||
|
const size_t offset = lz_options.preset_dict_size - copy_size;
|
||||||
|
memcpy(next->coder->dict.buf, lz_options.preset_dict + offset,
|
||||||
|
copy_size);
|
||||||
|
next->coder->dict.pos = copy_size;
|
||||||
|
next->coder->dict.full = copy_size;
|
||||||
|
}
|
||||||
|
|
||||||
// Miscellaneous initializations
|
// Miscellaneous initializations
|
||||||
next->coder->next_finished = false;
|
next->coder->next_finished = false;
|
||||||
next->coder->this_finished = false;
|
next->coder->this_finished = false;
|
||||||
|
@ -51,6 +51,13 @@ typedef struct {
|
|||||||
} lzma_dict;
|
} lzma_dict;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
size_t dict_size;
|
||||||
|
const uint8_t *preset_dict;
|
||||||
|
size_t preset_dict_size;
|
||||||
|
} lzma_lz_options;
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
/// Data specific to the LZ-based decoder
|
/// Data specific to the LZ-based decoder
|
||||||
lzma_coder *coder;
|
lzma_coder *coder;
|
||||||
@ -86,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
|
|||||||
lzma_allocator *allocator, const lzma_filter_info *filters,
|
lzma_allocator *allocator, const lzma_filter_info *filters,
|
||||||
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
|
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
|
||||||
lzma_allocator *allocator, const void *options,
|
lzma_allocator *allocator, const void *options,
|
||||||
size_t *dict_size));
|
lzma_lz_options *lz_options));
|
||||||
|
|
||||||
extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
|
extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
|
||||||
|
|
||||||
|
@ -363,7 +363,8 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
|
|||||||
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
|
lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
|
||||||
|
const lzma_lz_options *lz_options)
|
||||||
{
|
{
|
||||||
// Allocate the history buffer.
|
// Allocate the history buffer.
|
||||||
if (mf->buffer == NULL) {
|
if (mf->buffer == NULL) {
|
||||||
@ -421,6 +422,19 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
|
|||||||
// we avoid wasting RAM and improve initialization speed a lot.
|
// we avoid wasting RAM and improve initialization speed a lot.
|
||||||
//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
|
//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
|
||||||
|
|
||||||
|
// Handle preset dictionary.
|
||||||
|
if (lz_options->preset_dict != NULL
|
||||||
|
&& lz_options->preset_dict_size > 0) {
|
||||||
|
// If the preset dictionary is bigger than the actual
|
||||||
|
// dictionary, use only the tail.
|
||||||
|
mf->write_pos = MIN(lz_options->preset_dict_size, mf->size);
|
||||||
|
memcpy(mf->buffer, lz_options->preset_dict
|
||||||
|
+ lz_options->preset_dict_size - mf->write_pos,
|
||||||
|
mf->write_pos);
|
||||||
|
mf->action = LZMA_SYNC_FLUSH;
|
||||||
|
mf->skip(mf, mf->write_pos);
|
||||||
|
}
|
||||||
|
|
||||||
mf->action = LZMA_RUN;
|
mf->action = LZMA_RUN;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -509,7 +523,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||||||
|
|
||||||
// Allocate new buffers if needed, and do the rest of
|
// Allocate new buffers if needed, and do the rest of
|
||||||
// the initialization.
|
// the initialization.
|
||||||
if (lz_encoder_init(&next->coder->mf, allocator))
|
if (lz_encoder_init(&next->coder->mf, allocator, &lz_options))
|
||||||
return LZMA_MEM_ERROR;
|
return LZMA_MEM_ERROR;
|
||||||
|
|
||||||
// Initialize the next filter in the chain, if any.
|
// Initialize the next filter in the chain, if any.
|
||||||
|
@ -230,7 +230,7 @@ lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
|
|||||||
|
|
||||||
static lzma_ret
|
static lzma_ret
|
||||||
lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||||
const void *options, size_t *dict_size)
|
const void *opt, lzma_lz_options *lz_options)
|
||||||
{
|
{
|
||||||
if (lz->coder == NULL) {
|
if (lz->coder == NULL) {
|
||||||
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
||||||
@ -243,12 +243,15 @@ lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
|||||||
lz->coder->lzma = LZMA_LZ_DECODER_INIT;
|
lz->coder->lzma = LZMA_LZ_DECODER_INIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const lzma_options_lzma *options = opt;
|
||||||
|
|
||||||
lz->coder->sequence = SEQ_CONTROL;
|
lz->coder->sequence = SEQ_CONTROL;
|
||||||
lz->coder->need_properties = true;
|
lz->coder->need_properties = true;
|
||||||
lz->coder->need_dictionary_reset = true;
|
lz->coder->need_dictionary_reset = options->preset_dict == NULL
|
||||||
|
|| options->preset_dict_size == 0;
|
||||||
|
|
||||||
return lzma_lzma_decoder_create(&lz->coder->lzma,
|
return lzma_lzma_decoder_create(&lz->coder->lzma,
|
||||||
allocator, options, dict_size);
|
allocator, options, lz_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -318,15 +318,17 @@ lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
|
|||||||
lz->coder->lzma = NULL;
|
lz->coder->lzma = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
lz->coder->sequence = SEQ_INIT;
|
|
||||||
lz->coder->need_properties = true;
|
|
||||||
lz->coder->need_state_reset = false;
|
|
||||||
lz->coder->need_dictionary_reset = true;
|
|
||||||
|
|
||||||
lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
|
lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
|
||||||
lz->coder->opt_new = lz->coder->opt_cur.persistent
|
lz->coder->opt_new = lz->coder->opt_cur.persistent
|
||||||
? options : NULL;
|
? options : NULL;
|
||||||
|
|
||||||
|
lz->coder->sequence = SEQ_INIT;
|
||||||
|
lz->coder->need_properties = true;
|
||||||
|
lz->coder->need_state_reset = false;
|
||||||
|
lz->coder->need_dictionary_reset
|
||||||
|
= lz->coder->opt_cur.preset_dict == NULL
|
||||||
|
|| lz->coder->opt_cur.preset_dict_size == 0;
|
||||||
|
|
||||||
// Initialize LZMA encoder
|
// Initialize LZMA encoder
|
||||||
return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
|
return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
|
||||||
&lz->coder->opt_cur, lz_options));
|
&lz->coder->opt_cur, lz_options));
|
||||||
|
@ -941,7 +941,7 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt)
|
|||||||
|
|
||||||
extern lzma_ret
|
extern lzma_ret
|
||||||
lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||||
const void *opt, size_t *dict_size)
|
const void *opt, lzma_lz_options *lz_options)
|
||||||
{
|
{
|
||||||
if (lz->coder == NULL) {
|
if (lz->coder == NULL) {
|
||||||
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
||||||
@ -956,7 +956,9 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
|||||||
// All dictionary sizes are OK here. LZ decoder will take care of
|
// All dictionary sizes are OK here. LZ decoder will take care of
|
||||||
// the special cases.
|
// the special cases.
|
||||||
const lzma_options_lzma *options = opt;
|
const lzma_options_lzma *options = opt;
|
||||||
*dict_size = options->dict_size;
|
lz_options->dict_size = options->dict_size;
|
||||||
|
lz_options->preset_dict = options->preset_dict;
|
||||||
|
lz_options->preset_dict_size = options->preset_dict_size;
|
||||||
|
|
||||||
return LZMA_OK;
|
return LZMA_OK;
|
||||||
}
|
}
|
||||||
@ -967,13 +969,13 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
|||||||
/// the LZ initialization).
|
/// the LZ initialization).
|
||||||
static lzma_ret
|
static lzma_ret
|
||||||
lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||||
const void *options, size_t *dict_size)
|
const void *options, lzma_lz_options *lz_options)
|
||||||
{
|
{
|
||||||
if (!is_lclppb_valid(options))
|
if (!is_lclppb_valid(options))
|
||||||
return LZMA_PROG_ERROR;
|
return LZMA_PROG_ERROR;
|
||||||
|
|
||||||
return_if_error(lzma_lzma_decoder_create(
|
return_if_error(lzma_lzma_decoder_create(
|
||||||
lz, allocator, options, dict_size));
|
lz, allocator, options, lz_options));
|
||||||
|
|
||||||
lzma_decoder_reset(lz->coder, options);
|
lzma_decoder_reset(lz->coder, options);
|
||||||
lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
|
lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
|
||||||
|
@ -48,7 +48,7 @@ extern bool lzma_lzma_lclppb_decode(
|
|||||||
/// LZMA2 decoders.
|
/// LZMA2 decoders.
|
||||||
extern lzma_ret lzma_lzma_decoder_create(
|
extern lzma_ret lzma_lzma_decoder_create(
|
||||||
lzma_lz_decoder *lz, lzma_allocator *allocator,
|
lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||||
const void *opt, size_t *dict_size);
|
const void *opt, lzma_lz_options *lz_options);
|
||||||
|
|
||||||
/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
|
/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
|
||||||
/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb.
|
/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb.
|
||||||
|
@ -274,6 +274,8 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf,
|
|||||||
static bool
|
static bool
|
||||||
encode_init(lzma_coder *coder, lzma_mf *mf)
|
encode_init(lzma_coder *coder, lzma_mf *mf)
|
||||||
{
|
{
|
||||||
|
assert(mf_position(mf) == 0);
|
||||||
|
|
||||||
if (mf->read_pos == mf->read_limit) {
|
if (mf->read_pos == mf->read_limit) {
|
||||||
if (mf->action == LZMA_RUN)
|
if (mf->action == LZMA_RUN)
|
||||||
return false; // We cannot do anything.
|
return false; // We cannot do anything.
|
||||||
@ -594,7 +596,12 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
|
|||||||
return LZMA_OPTIONS_ERROR;
|
return LZMA_OPTIONS_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
coder->is_initialized = false;
|
// We don't need to write the first byte as literal if there is
|
||||||
|
// a non-empty preset dictionary. encode_init() wouldn't even work
|
||||||
|
// if there is a non-empty preset dictionary, because encode_init()
|
||||||
|
// assumes that position is zero and previous byte is also zero.
|
||||||
|
coder->is_initialized = options->preset_dict != NULL
|
||||||
|
&& options->preset_dict_size > 0;
|
||||||
coder->is_flushed = false;
|
coder->is_flushed = false;
|
||||||
|
|
||||||
set_lz_options(lz_options, options);
|
set_lz_options(lz_options, options);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user