xz/src/liblzma/common/index.c

777 lines
20 KiB
C

///////////////////////////////////////////////////////////////////////////////
//
/// \file index.c
/// \brief Handling of Index
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include "index.h"
/// Number of Records to allocate at once in the unrolled list.
#define INDEX_GROUP_SIZE 256
typedef struct lzma_index_group_s lzma_index_group;
struct lzma_index_group_s {
/// Previous group
lzma_index_group *prev;
/// Next group
lzma_index_group *next;
/// Index of the last Record in this group
size_t last;
/// Unpadded Size fields as special cumulative sum relative to the
/// beginning of the group. It's special in sense that the previous
/// value is rounded up the next multiple of four with before
/// calculating the new value. The total encoded size of the Blocks
/// in the group is unpadded_sums[last] rounded up to the next
/// multiple of four.
///
/// For example, if the Unpadded Sizes are 39, 57, and 81, the stored
/// values are 39, 97 (40 + 57), and 181 (100 + 181). The total
/// encoded size of these Blocks is 184.
///
/// This encoding is nice from point of view of lzma_index_locate().
lzma_vli unpadded_sums[INDEX_GROUP_SIZE];
/// Uncompressed Size fields as cumulative sum relative to the
/// beginning of the group. The uncompressed size of the group is
/// uncompressed_sums[last].
lzma_vli uncompressed_sums[INDEX_GROUP_SIZE];
/// True if the Record is padding
bool paddings[INDEX_GROUP_SIZE];
};
struct lzma_index_s {
/// Total size of the Blocks and padding
lzma_vli total_size;
/// Uncompressed size of the Stream
lzma_vli uncompressed_size;
/// Number of non-padding records. This is needed for Index encoder.
lzma_vli count;
/// Size of the List of Records field; this is updated every time
/// a new non-padding Record is added.
lzma_vli index_list_size;
/// First group of Records
lzma_index_group *head;
/// Last group of Records
lzma_index_group *tail;
/// Tracking the read position
struct {
/// Group where the current read position is.
lzma_index_group *group;
/// The most recently read Record in *group
size_t record;
/// Uncompressed offset of the beginning of *group relative
/// to the beginning of the Stream
lzma_vli uncompressed_offset;
/// Compressed offset of the beginning of *group relative
/// to the beginning of the Stream
lzma_vli stream_offset;
} current;
/// Information about earlier Indexes when multiple Indexes have
/// been combined.
struct {
/// Sum of the Record counts of the all but the last Stream.
lzma_vli count;
/// Sum of the List of Records fields of all but the last
/// Stream. This is needed when a new Index is concatenated
/// to this lzma_index structure.
lzma_vli index_list_size;
/// Total size of all but the last Stream and all Stream
/// Padding fields.
lzma_vli streams_size;
} old;
};
extern LZMA_API(lzma_vli)
lzma_index_memusage(lzma_vli count)
{
if (count > LZMA_VLI_MAX)
return UINT64_MAX;
return sizeof(lzma_index) + (count + INDEX_GROUP_SIZE - 1)
/ INDEX_GROUP_SIZE * sizeof(lzma_index_group);
}
static void
free_index_list(lzma_index *i, lzma_allocator *allocator)
{
lzma_index_group *g = i->head;
while (g != NULL) {
lzma_index_group *tmp = g->next;
lzma_free(g, allocator);
g = tmp;
}
return;
}
extern LZMA_API(lzma_index *)
lzma_index_init(lzma_index *i, lzma_allocator *allocator)
{
if (i == NULL) {
i = lzma_alloc(sizeof(lzma_index), allocator);
if (i == NULL)
return NULL;
} else {
free_index_list(i, allocator);
}
i->total_size = 0;
i->uncompressed_size = 0;
i->count = 0;
i->index_list_size = 0;
i->head = NULL;
i->tail = NULL;
i->current.group = NULL;
i->old.count = 0;
i->old.index_list_size = 0;
i->old.streams_size = 0;
return i;
}
extern LZMA_API(void)
lzma_index_end(lzma_index *i, lzma_allocator *allocator)
{
if (i != NULL) {
free_index_list(i, allocator);
lzma_free(i, allocator);
}
return;
}
extern LZMA_API(lzma_vli)
lzma_index_count(const lzma_index *i)
{
return i->count;
}
extern LZMA_API(lzma_vli)
lzma_index_size(const lzma_index *i)
{
return index_size(i->count, i->index_list_size);
}
extern LZMA_API(lzma_vli)
lzma_index_total_size(const lzma_index *i)
{
return i->total_size;
}
extern LZMA_API(lzma_vli)
lzma_index_stream_size(const lzma_index *i)
{
// Stream Header + Blocks + Index + Stream Footer
return LZMA_STREAM_HEADER_SIZE + i->total_size
+ index_size(i->count, i->index_list_size)
+ LZMA_STREAM_HEADER_SIZE;
}
extern LZMA_API(lzma_vli)
lzma_index_file_size(const lzma_index *i)
{
// If multiple Streams are concatenated, the Stream Header, Index,
// and Stream Footer fields of all but the last Stream are already
// included in old.streams_size. Thus, we need to calculate only the
// size of the last Index, not all Indexes.
return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size
+ index_size(i->count - i->old.count,
i->index_list_size - i->old.index_list_size)
+ LZMA_STREAM_HEADER_SIZE;
}
extern LZMA_API(lzma_vli)
lzma_index_uncompressed_size(const lzma_index *i)
{
return i->uncompressed_size;
}
extern uint32_t
lzma_index_padding_size(const lzma_index *i)
{
return (LZMA_VLI_C(4)
- index_size_unpadded(i->count, i->index_list_size)) & 3;
}
/// Appends a new Record to the Index. If needed, this allocates a new
/// Record group.
static lzma_ret
index_append_real(lzma_index *i, lzma_allocator *allocator,
lzma_vli unpadded_size, lzma_vli uncompressed_size,
bool is_padding)
{
// Add the new record.
if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) {
// Allocate a new group.
lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group),
allocator);
if (g == NULL)
return LZMA_MEM_ERROR;
// Initialize the group and set its first record.
g->prev = i->tail;
g->next = NULL;
g->last = 0;
g->unpadded_sums[0] = unpadded_size;
g->uncompressed_sums[0] = uncompressed_size;
g->paddings[0] = is_padding;
// If this is the first group, make it the head.
if (i->head == NULL)
i->head = g;
else
i->tail->next = g;
// Make it the new tail.
i->tail = g;
} else {
// i->tail has space left for at least one record.
i->tail->unpadded_sums[i->tail->last + 1]
= unpadded_size + vli_ceil4(
i->tail->unpadded_sums[i->tail->last]);
i->tail->uncompressed_sums[i->tail->last + 1]
= i->tail->uncompressed_sums[i->tail->last]
+ uncompressed_size;
i->tail->paddings[i->tail->last + 1] = is_padding;
++i->tail->last;
}
return LZMA_OK;
}
extern LZMA_API(lzma_ret)
lzma_index_append(lzma_index *i, lzma_allocator *allocator,
lzma_vli unpadded_size, lzma_vli uncompressed_size)
{
if (unpadded_size < UNPADDED_SIZE_MIN
|| unpadded_size > UNPADDED_SIZE_MAX
|| uncompressed_size > LZMA_VLI_MAX)
return LZMA_PROG_ERROR;
// This looks a bit ugly. We want to first validate that the Index
// and Stream stay in valid limits after adding this Record. After
// validating, we may need to allocate a new lzma_index_group (it's
// slightly more correct to validate before allocating, YMMV).
lzma_ret ret;
// First update the overall info so we can validate it.
const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size)
+ lzma_vli_size(uncompressed_size);
const lzma_vli total_size = vli_ceil4(unpadded_size);
i->total_size += total_size;
i->uncompressed_size += uncompressed_size;
++i->count;
i->index_list_size += index_list_size_add;
if (i->total_size > LZMA_VLI_MAX
|| i->uncompressed_size > LZMA_VLI_MAX
|| lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
|| lzma_index_file_size(i) > LZMA_VLI_MAX)
ret = LZMA_DATA_ERROR; // Would grow past the limits.
else
ret = index_append_real(i, allocator, unpadded_size,
uncompressed_size, false);
if (ret != LZMA_OK) {
// Something went wrong. Undo the updates.
i->total_size -= total_size;
i->uncompressed_size -= uncompressed_size;
--i->count;
i->index_list_size -= index_list_size_add;
}
return ret;
}
/// Initialize i->current to point to the first Record.
static bool
init_current(lzma_index *i)
{
if (i->head == NULL) {
assert(i->count == 0);
return true;
}
assert(i->count > 0);
i->current.group = i->head;
i->current.record = 0;
i->current.stream_offset = LZMA_STREAM_HEADER_SIZE;
i->current.uncompressed_offset = 0;
return false;
}
/// Go backward to the previous group.
static void
previous_group(lzma_index *i)
{
assert(i->current.group->prev != NULL);
// Go to the previous group first.
i->current.group = i->current.group->prev;
i->current.record = i->current.group->last;
// Then update the offsets.
i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[
i->current.group->last]);
i->current.uncompressed_offset -= i->current.group->uncompressed_sums[
i->current.group->last];
return;
}
/// Go forward to the next group.
static void
next_group(lzma_index *i)
{
assert(i->current.group->next != NULL);
// Update the offsets first.
i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[
i->current.group->last]);
i->current.uncompressed_offset += i->current.group
->uncompressed_sums[i->current.group->last];
// Then go to the next group.
i->current.record = 0;
i->current.group = i->current.group->next;
return;
}
/// Set *info from i->current.
static void
set_info(const lzma_index *i, lzma_index_record *info)
{
// First copy the cumulative sizes from the current Record of the
// current group.
info->unpadded_size
= i->current.group->unpadded_sums[i->current.record];
info->total_size = vli_ceil4(info->unpadded_size);
info->uncompressed_size = i->current.group->uncompressed_sums[
i->current.record];
// Copy the start offsets of this group.
info->stream_offset = i->current.stream_offset;
info->uncompressed_offset = i->current.uncompressed_offset;
// If it's not the first Record in this group, we need to do some
// adjustements.
if (i->current.record > 0) {
// Since the _sums[] are cumulative, we substract the sums of
// the previous Record to get the sizes of the current Record,
// and add the sums of the previous Record to the offsets.
// With unpadded_sums[] we need to take into account that it
// uses a bit weird way to do the cumulative summing
const lzma_vli total_sum
= vli_ceil4(i->current.group->unpadded_sums[
i->current.record - 1]);
const lzma_vli uncompressed_sum = i->current.group
->uncompressed_sums[i->current.record - 1];
info->total_size -= total_sum;
info->unpadded_size -= total_sum;
info->uncompressed_size -= uncompressed_sum;
info->stream_offset += total_sum;
info->uncompressed_offset += uncompressed_sum;
}
return;
}
extern LZMA_API(lzma_bool)
lzma_index_read(lzma_index *i, lzma_index_record *info)
{
if (i->current.group == NULL) {
// We are at the beginning of the Record list. Set up
// i->current point at the first Record. Return if there
// are no Records.
if (init_current(i))
return true;
} else do {
// Try to go the next Record.
if (i->current.record < i->current.group->last)
++i->current.record;
else if (i->current.group->next == NULL)
return true;
else
next_group(i);
} while (i->current.group->paddings[i->current.record]);
// We found a new Record. Set the information to *info.
set_info(i, info);
return false;
}
extern LZMA_API(void)
lzma_index_rewind(lzma_index *i)
{
i->current.group = NULL;
return;
}
extern LZMA_API(lzma_bool)
lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target)
{
// Check if it is possible to fullfill the request.
if (target >= i->uncompressed_size)
return true;
// Now we know that we will have an answer. Initialize the current
// read position if needed.
if (i->current.group == NULL && init_current(i))
return true;
// Locate the group where the wanted Block is. First search forward.
while (i->current.uncompressed_offset <= target) {
// If the first uncompressed byte of the next group is past
// the target offset, it has to be this or an earlier group.
if (i->current.uncompressed_offset + i->current.group
->uncompressed_sums[i->current.group->last]
> target)
break;
// Go forward to the next group.
next_group(i);
}
// Then search backward.
while (i->current.uncompressed_offset > target)
previous_group(i);
// Now the target Block is somewhere in i->current.group. Offsets
// in groups are relative to the beginning of the group, thus
// we must adjust the target before starting the search loop.
assert(target >= i->current.uncompressed_offset);
target -= i->current.uncompressed_offset;
// Use binary search to locate the exact Record. It is the first
// Record whose uncompressed_sums[] value is greater than target.
// This is because we want the rightmost Record that fullfills the
// search criterion. It is possible that there are empty Blocks or
// padding, we don't want to return them.
size_t left = 0;
size_t right = i->current.group->last;
while (left < right) {
const size_t pos = left + (right - left) / 2;
if (i->current.group->uncompressed_sums[pos] <= target)
left = pos + 1;
else
right = pos;
}
i->current.record = left;
#ifndef NDEBUG
// The found Record must not be padding or have zero uncompressed size.
assert(!i->current.group->paddings[i->current.record]);
if (i->current.record == 0)
assert(i->current.group->uncompressed_sums[0] > 0);
else
assert(i->current.group->uncompressed_sums[i->current.record]
- i->current.group->uncompressed_sums[
i->current.record - 1] > 0);
#endif
set_info(i, info);
return false;
}
extern LZMA_API(lzma_ret)
lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
lzma_allocator *allocator, lzma_vli padding)
{
if (dest == NULL || src == NULL || dest == src
|| padding > LZMA_VLI_MAX)
return LZMA_PROG_ERROR;
// Check that the combined size of the Indexes stays within limits.
{
const lzma_vli dest_size = index_size_unpadded(
dest->count, dest->index_list_size);
const lzma_vli src_size = index_size_unpadded(
src->count, src->index_list_size);
if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
return LZMA_DATA_ERROR;
}
// Check that the combined size of the "files" (combined total
// encoded sizes) stays within limits.
{
const lzma_vli dest_size = lzma_index_file_size(dest);
const lzma_vli src_size = lzma_index_file_size(src);
if (dest_size + src_size > LZMA_VLI_MAX
|| dest_size + src_size + padding
> LZMA_VLI_MAX)
return LZMA_DATA_ERROR;
}
// Add a padding Record to take into account the size of
// Index + Stream Footer + Stream Padding + Stream Header.
//
// NOTE: This cannot overflow, because Index Size is always
// far smaller than LZMA_VLI_MAX, and adding two VLIs
// (Index Size and padding) doesn't overflow.
padding += index_size(dest->count - dest->old.count,
dest->index_list_size
- dest->old.index_list_size)
+ LZMA_STREAM_HEADER_SIZE * 2;
// While the above cannot overflow, but it may become an invalid VLI.
if (padding > LZMA_VLI_MAX)
return LZMA_DATA_ERROR;
// Add the padding Record.
{
lzma_ret ret;
// First update the info so we can validate it.
dest->old.streams_size += padding;
if (dest->old.streams_size > LZMA_VLI_MAX
|| lzma_index_file_size(dest) > LZMA_VLI_MAX)
ret = LZMA_DATA_ERROR; // Would grow past the limits.
else
ret = index_append_real(dest, allocator,
padding, 0, true);
// If something went wrong, undo the updated value and return
// the error.
if (ret != LZMA_OK) {
dest->old.streams_size -= padding;
return ret;
}
}
// Avoid wasting lots of memory if src->head has only a few records
// that fit into dest->tail. That is, combine two groups if possible.
//
// NOTE: We know that dest->tail != NULL since we just appended
// a padding Record. But we don't know about src->head.
if (src->head != NULL && src->head->last + 1
<= INDEX_GROUP_SIZE - dest->tail->last - 1) {
// Copy the first Record.
dest->tail->unpadded_sums[dest->tail->last + 1]
= vli_ceil4(dest->tail->unpadded_sums[
dest->tail->last])
+ src->head->unpadded_sums[0];
dest->tail->uncompressed_sums[dest->tail->last + 1]
= dest->tail->uncompressed_sums[dest->tail->last]
+ src->head->uncompressed_sums[0];
dest->tail->paddings[dest->tail->last + 1]
= src->head->paddings[0];
++dest->tail->last;
// Copy the rest.
for (size_t i = 1; i < src->head->last; ++i) {
dest->tail->unpadded_sums[dest->tail->last + 1]
= vli_ceil4(dest->tail->unpadded_sums[
dest->tail->last])
+ src->head->unpadded_sums[i + 1]
- src->head->unpadded_sums[i];
dest->tail->uncompressed_sums[dest->tail->last + 1]
= dest->tail->uncompressed_sums[
dest->tail->last]
+ src->head->uncompressed_sums[i + 1]
- src->head->uncompressed_sums[i];
dest->tail->paddings[dest->tail->last + 1]
= src->head->paddings[i + 1];
++dest->tail->last;
}
// Free the head group of *src. Don't bother updating prev
// pointers since those won't be used for anything before
// we deallocate the whole *src structure.
lzma_index_group *tmp = src->head;
src->head = src->head->next;
lzma_free(tmp, allocator);
}
// If there are groups left in *src, join them as is. Note that if we
// are combining already combined Indexes, src->head can be non-NULL
// even if we just combined the old src->head to dest->tail.
if (src->head != NULL) {
src->head->prev = dest->tail;
dest->tail->next = src->head;
dest->tail = src->tail;
}
// Update information about earlier Indexes. Only the last Index
// from *src won't be counted in dest->old. The last Index is left
// open and can be even appended with lzma_index_append().
dest->old.count = dest->count + src->old.count;
dest->old.index_list_size
= dest->index_list_size + src->old.index_list_size;
dest->old.streams_size += src->old.streams_size;
// Update overall information.
dest->total_size += src->total_size;
dest->uncompressed_size += src->uncompressed_size;
dest->count += src->count;
dest->index_list_size += src->index_list_size;
// *src has nothing left but the base structure.
lzma_free(src, allocator);
return LZMA_OK;
}
extern LZMA_API(lzma_index *)
lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
{
lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator);
if (dest == NULL)
return NULL;
// Copy the base structure except the pointers.
*dest = *src;
dest->head = NULL;
dest->tail = NULL;
dest->current.group = NULL;
// Copy the Records.
const lzma_index_group *src_group = src->head;
while (src_group != NULL) {
// Allocate a new group.
lzma_index_group *dest_group = lzma_alloc(
sizeof(lzma_index_group), allocator);
if (dest_group == NULL) {
lzma_index_end(dest, allocator);
return NULL;
}
// Set the pointers.
dest_group->prev = dest->tail;
dest_group->next = NULL;
if (dest->head == NULL)
dest->head = dest_group;
else
dest->tail->next = dest_group;
dest->tail = dest_group;
dest_group->last = src_group->last;
// Copy the arrays so that we don't read uninitialized memory.
const size_t count = src_group->last + 1;
memcpy(dest_group->unpadded_sums, src_group->unpadded_sums,
sizeof(lzma_vli) * count);
memcpy(dest_group->uncompressed_sums,
src_group->uncompressed_sums,
sizeof(lzma_vli) * count);
memcpy(dest_group->paddings, src_group->paddings,
sizeof(bool) * count);
// Copy also the read position.
if (src_group == src->current.group)
dest->current.group = dest->tail;
src_group = src_group->next;
}
return dest;
}
extern LZMA_API(lzma_bool)
lzma_index_equal(const lzma_index *a, const lzma_index *b)
{
// No point to compare more if the pointers are the same.
if (a == b)
return true;
// Compare the basic properties.
if (a->total_size != b->total_size
|| a->uncompressed_size != b->uncompressed_size
|| a->index_list_size != b->index_list_size
|| a->count != b->count)
return false;
// Compare the Records.
const lzma_index_group *ag = a->head;
const lzma_index_group *bg = b->head;
while (ag != NULL && bg != NULL) {
const size_t count = ag->last + 1;
if (ag->last != bg->last
|| memcmp(ag->unpadded_sums,
bg->unpadded_sums,
sizeof(lzma_vli) * count) != 0
|| memcmp(ag->uncompressed_sums,
bg->uncompressed_sums,
sizeof(lzma_vli) * count) != 0
|| memcmp(ag->paddings, bg->paddings,
sizeof(bool) * count) != 0)
return false;
ag = ag->next;
bg = bg->next;
}
return ag == NULL && bg == NULL;
}