mirror of https://git.tukaani.org/xz.git
liblzma: Use lzma_alloc_zero() in LZ encoder initialization.
This avoids a memzero() call for a newly-allocated memory, which can be expensive when encoding small streams with an over-sized dictionary. To avoid using lzma_alloc_zero() for memory that doesn't need to be zeroed, lzma_mf.son is now allocated separately, which requires handling it separately in normalize() too. Thanks to Vincenzo Innocente for reporting the problem.
This commit is contained in:
parent
28af24e9cf
commit
da1718f266
|
@ -326,25 +326,22 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
|
||||||
hs += HASH_4_SIZE;
|
hs += HASH_4_SIZE;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// If the above code calculating hs is modified, make sure that
|
const uint32_t old_hash_count = mf->hash_count;
|
||||||
// this assertion stays valid (UINT32_MAX / 5 is not strictly the
|
const uint32_t old_sons_count = mf->sons_count;
|
||||||
// exact limit). If it doesn't, you need to calculate that
|
mf->hash_count = hs;
|
||||||
// hash_size_sum + sons_count cannot overflow.
|
|
||||||
assert(hs < UINT32_MAX / 5);
|
|
||||||
|
|
||||||
const uint32_t old_count = mf->hash_size_sum + mf->sons_count;
|
|
||||||
mf->hash_size_sum = hs;
|
|
||||||
mf->sons_count = mf->cyclic_size;
|
mf->sons_count = mf->cyclic_size;
|
||||||
if (is_bt)
|
if (is_bt)
|
||||||
mf->sons_count *= 2;
|
mf->sons_count *= 2;
|
||||||
|
|
||||||
const uint32_t new_count = mf->hash_size_sum + mf->sons_count;
|
|
||||||
|
|
||||||
// Deallocate the old hash array if it exists and has different size
|
// Deallocate the old hash array if it exists and has different size
|
||||||
// than what is needed now.
|
// than what is needed now.
|
||||||
if (old_count != new_count) {
|
if (old_hash_count != mf->hash_count
|
||||||
|
|| old_sons_count != mf->sons_count) {
|
||||||
lzma_free(mf->hash, allocator);
|
lzma_free(mf->hash, allocator);
|
||||||
mf->hash = NULL;
|
mf->hash = NULL;
|
||||||
|
|
||||||
|
lzma_free(mf->son, allocator);
|
||||||
|
mf->son = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maximum number of match finder cycles
|
// Maximum number of match finder cycles
|
||||||
|
@ -382,43 +379,48 @@ lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator,
|
||||||
mf->write_pos = 0;
|
mf->write_pos = 0;
|
||||||
mf->pending = 0;
|
mf->pending = 0;
|
||||||
|
|
||||||
// Allocate match finder's hash array.
|
|
||||||
const size_t alloc_count = mf->hash_size_sum + mf->sons_count;
|
|
||||||
|
|
||||||
#if UINT32_MAX >= SIZE_MAX / 4
|
#if UINT32_MAX >= SIZE_MAX / 4
|
||||||
// Check for integer overflow. (Huge dictionaries are not
|
// Check for integer overflow. (Huge dictionaries are not
|
||||||
// possible on 32-bit CPU.)
|
// possible on 32-bit CPU.)
|
||||||
if (alloc_count > SIZE_MAX / sizeof(uint32_t))
|
if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
|
||||||
|
|| mf->sons_count > SIZE_MAX / sizeof(uint32_t))
|
||||||
return true;
|
return true;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
|
||||||
|
// is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
|
||||||
|
//
|
||||||
|
// We don't need to initialize mf->son, but not doing that may
|
||||||
|
// make Valgrind complain in normalization (see normalize() in
|
||||||
|
// lz_encoder_mf.c). Skipping the initialization is *very* good
|
||||||
|
// when big dictionary is used but only small amount of data gets
|
||||||
|
// actually compressed: most of the mf->son won't get actually
|
||||||
|
// allocated by the kernel, so we avoid wasting RAM and improve
|
||||||
|
// initialization speed a lot.
|
||||||
if (mf->hash == NULL) {
|
if (mf->hash == NULL) {
|
||||||
mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t),
|
mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),
|
||||||
allocator);
|
allocator);
|
||||||
if (mf->hash == NULL)
|
mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
|
||||||
|
allocator);
|
||||||
|
|
||||||
|
if (mf->hash == NULL || mf->son == NULL) {
|
||||||
|
lzma_free(mf->hash, allocator);
|
||||||
|
mf->hash = NULL;
|
||||||
|
|
||||||
|
lzma_free(mf->son, allocator);
|
||||||
|
mf->son = NULL;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
mf->son = mf->hash + mf->hash_size_sum;
|
|
||||||
mf->cyclic_pos = 0;
|
|
||||||
|
|
||||||
// Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we
|
|
||||||
// can use memset().
|
|
||||||
/*
|
/*
|
||||||
for (uint32_t i = 0; i < hash_size_sum; ++i)
|
for (uint32_t i = 0; i < mf->hash_count; ++i)
|
||||||
mf->hash[i] = EMPTY_HASH_VALUE;
|
mf->hash[i] = EMPTY_HASH_VALUE;
|
||||||
*/
|
*/
|
||||||
memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t));
|
memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
// We don't need to initialize mf->son, but not doing that will
|
mf->cyclic_pos = 0;
|
||||||
// make Valgrind complain in normalization (see normalize() in
|
|
||||||
// lz_encoder_mf.c).
|
|
||||||
//
|
|
||||||
// Skipping this initialization is *very* good when big dictionary is
|
|
||||||
// used but only small amount of data gets actually compressed: most
|
|
||||||
// of the mf->hash won't get actually allocated by the kernel, so
|
|
||||||
// we avoid wasting RAM and improve initialization speed a lot.
|
|
||||||
//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
|
|
||||||
|
|
||||||
// Handle preset dictionary.
|
// Handle preset dictionary.
|
||||||
if (lz_options->preset_dict != NULL
|
if (lz_options->preset_dict != NULL
|
||||||
|
@ -446,7 +448,8 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
|
||||||
lzma_mf mf = {
|
lzma_mf mf = {
|
||||||
.buffer = NULL,
|
.buffer = NULL,
|
||||||
.hash = NULL,
|
.hash = NULL,
|
||||||
.hash_size_sum = 0,
|
.son = NULL,
|
||||||
|
.hash_count = 0,
|
||||||
.sons_count = 0,
|
.sons_count = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -455,9 +458,8 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
|
||||||
return UINT64_MAX;
|
return UINT64_MAX;
|
||||||
|
|
||||||
// Calculate the memory usage.
|
// Calculate the memory usage.
|
||||||
return (uint64_t)(mf.hash_size_sum + mf.sons_count)
|
return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
|
||||||
* sizeof(uint32_t)
|
+ mf.size + sizeof(lzma_coder);
|
||||||
+ (uint64_t)(mf.size) + sizeof(lzma_coder);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -466,6 +468,7 @@ lz_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
|
||||||
{
|
{
|
||||||
lzma_next_end(&coder->next, allocator);
|
lzma_next_end(&coder->next, allocator);
|
||||||
|
|
||||||
|
lzma_free(coder->mf.son, allocator);
|
||||||
lzma_free(coder->mf.hash, allocator);
|
lzma_free(coder->mf.hash, allocator);
|
||||||
lzma_free(coder->mf.buffer, allocator);
|
lzma_free(coder->mf.buffer, allocator);
|
||||||
|
|
||||||
|
@ -523,7 +526,8 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
||||||
|
|
||||||
next->coder->mf.buffer = NULL;
|
next->coder->mf.buffer = NULL;
|
||||||
next->coder->mf.hash = NULL;
|
next->coder->mf.hash = NULL;
|
||||||
next->coder->mf.hash_size_sum = 0;
|
next->coder->mf.son = NULL;
|
||||||
|
next->coder->mf.hash_count = 0;
|
||||||
next->coder->mf.sons_count = 0;
|
next->coder->mf.sons_count = 0;
|
||||||
|
|
||||||
next->coder->next = LZMA_NEXT_CODER_INIT;
|
next->coder->next = LZMA_NEXT_CODER_INIT;
|
||||||
|
|
|
@ -119,7 +119,7 @@ struct lzma_mf_s {
|
||||||
lzma_action action;
|
lzma_action action;
|
||||||
|
|
||||||
/// Number of elements in hash[]
|
/// Number of elements in hash[]
|
||||||
uint32_t hash_size_sum;
|
uint32_t hash_count;
|
||||||
|
|
||||||
/// Number of elements in son[]
|
/// Number of elements in son[]
|
||||||
uint32_t sons_count;
|
uint32_t sons_count;
|
||||||
|
|
|
@ -116,24 +116,27 @@ normalize(lzma_mf *mf)
|
||||||
= (MUST_NORMALIZE_POS - mf->cyclic_size);
|
= (MUST_NORMALIZE_POS - mf->cyclic_size);
|
||||||
// & (~(UINT32_C(1) << 10) - 1);
|
// & (~(UINT32_C(1) << 10) - 1);
|
||||||
|
|
||||||
const uint32_t count = mf->hash_size_sum + mf->sons_count;
|
for (uint32_t i = 0; i < mf->hash_count; ++i) {
|
||||||
uint32_t *hash = mf->hash;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
// If the distance is greater than the dictionary size,
|
// If the distance is greater than the dictionary size,
|
||||||
// we can simply mark the hash element as empty.
|
// we can simply mark the hash element as empty.
|
||||||
//
|
if (mf->hash[i] <= subvalue)
|
||||||
// NOTE: Only the first mf->hash_size_sum elements are
|
mf->hash[i] = EMPTY_HASH_VALUE;
|
||||||
// initialized for sure. There may be uninitialized elements
|
|
||||||
// in mf->son. Since we go through both mf->hash and
|
|
||||||
// mf->son here in normalization, Valgrind may complain
|
|
||||||
// that the "if" below depends on uninitialized value. In
|
|
||||||
// this case it is safe to ignore the warning. See also the
|
|
||||||
// comments in lz_encoder_init() in lz_encoder.c.
|
|
||||||
if (hash[i] <= subvalue)
|
|
||||||
hash[i] = EMPTY_HASH_VALUE;
|
|
||||||
else
|
else
|
||||||
hash[i] -= subvalue;
|
mf->hash[i] -= subvalue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < mf->sons_count; ++i) {
|
||||||
|
// Do the same for mf->son.
|
||||||
|
//
|
||||||
|
// NOTE: There may be uninitialized elements in mf->son.
|
||||||
|
// Valgrind may complain that the "if" below depends on
|
||||||
|
// an uninitialized value. In this case it is safe to ignore
|
||||||
|
// the warning. See also the comments in lz_encoder_init()
|
||||||
|
// in lz_encoder.c.
|
||||||
|
if (mf->son[i] <= subvalue)
|
||||||
|
mf->son[i] = EMPTY_HASH_VALUE;
|
||||||
|
else
|
||||||
|
mf->son[i] -= subvalue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update offset to match the new locations.
|
// Update offset to match the new locations.
|
||||||
|
|
Loading…
Reference in New Issue