xz: Add incomplete support for --block-list.

It's broken with threads and when also --block-size is used.
This commit is contained in:
Lasse Collin 2012-07-03 21:16:39 +03:00
parent 972179cdcd
commit 88ccf47205
7 changed files with 151 additions and 10 deletions

View File

@ -54,6 +54,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
}
static void
parse_block_list(char *str)
{
// It must be non-empty and not begin with a comma.
if (str[0] == '\0' || str[0] == ',')
message_fatal(_("%s: Invalid argument to --block-list"), str);
// Count the number of comma-separated strings.
size_t count = 1;
for (size_t i = 0; str[i] != '\0'; ++i)
if (str[i] == ',')
++count;
// Prevent an unlikely integer overflow.
if (count > SIZE_MAX / sizeof(uint64_t) - 1)
message_fatal(_("%s: Too many arguments to --block-list"),
str);
// Allocate memory to hold all the sizes specified.
// If --block-list was specified already, its value is forgotten.
free(opt_block_list);
opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
for (size_t i = 0; i < count; ++i) {
// Locate the next comma and replace it with \0.
char *p = strchr(str, ',');
if (p != NULL)
*p = '\0';
if (str[0] == '\0') {
// There is no string, that is, a comma follows
// another comma. Use the previous value.
//
// NOTE: We checked earler that the first char
// of the whole list cannot be a comma.
assert(i > 0);
opt_block_list[i] = opt_block_list[i - 1];
} else {
opt_block_list[i] = str_to_uint64("block-list", str,
0, UINT64_MAX);
// Zero indicates no more new Blocks.
if (opt_block_list[i] == 0) {
if (i + 1 != count)
message_fatal(_("0 can only be used "
"as the last element "
"in --block-list"));
opt_block_list[i] = UINT64_MAX;
}
}
str = p + 1;
}
// Terminate the array.
opt_block_list[count] = 0;
return;
}
static void
parse_real(args_info *args, int argc, char **argv)
{
@ -73,6 +134,7 @@ parse_real(args_info *args, int argc, char **argv)
OPT_FILES,
OPT_FILES0,
OPT_BLOCK_SIZE,
OPT_BLOCK_LIST,
OPT_MEM_COMPRESS,
OPT_MEM_DECOMPRESS,
OPT_NO_ADJUST,
@ -107,6 +169,7 @@ parse_real(args_info *args, int argc, char **argv)
{ "format", required_argument, NULL, 'F' },
{ "check", required_argument, NULL, 'C' },
{ "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
{ "block-list", required_argument, NULL, OPT_BLOCK_LIST },
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
{ "memlimit", required_argument, NULL, 'M' },
@ -378,6 +441,11 @@ parse_real(args_info *args, int argc, char **argv)
0, LZMA_VLI_MAX);
break;
case OPT_BLOCK_LIST: {
parse_block_list(optarg);
break;
}
case OPT_SINGLE_STREAM:
opt_single_stream = true;
break;
@ -590,3 +658,13 @@ args_parse(args_info *args, int argc, char **argv)
return;
}
#ifndef NDEBUG
extern void
args_free(void)
{
free(opt_block_list);
return;
}
#endif

View File

@ -40,3 +40,4 @@ extern bool opt_robot;
extern const char stdin_filename[];
extern void args_parse(args_info *args, int argc, char **argv);
extern void args_free(void);

View File

@ -26,6 +26,7 @@ enum format_type opt_format = FORMAT_AUTO;
bool opt_auto_adjust = true;
bool opt_single_stream = false;
uint64_t opt_block_size = 0;
uint64_t *opt_block_list = NULL;
/// Stream used to communicate with liblzma
@ -522,16 +523,37 @@ coder_normal(file_pair *pair)
// Assume that something goes wrong.
bool success = false;
// block_remaining indicates how many input bytes to encode until
// block_remaining indicates how many input bytes to encode before
// finishing the current .xz Block. The Block size is set with
// --block-size=SIZE. It has an effect only when compressing
// to the .xz format. If block_remaining == UINT64_MAX, only
// a single block is created.
// --block-size=SIZE and --block-list. They have an effect only when
// compressing to the .xz format. If block_remaining == UINT64_MAX,
// only a single block is created.
uint64_t block_remaining = UINT64_MAX;
if (hardware_threads_get() == 1 && opt_mode == MODE_COMPRESS
&& opt_format == FORMAT_XZ && opt_block_size > 0)
// Position in opt_block_list. Unused if --block-list wasn't used.
size_t list_pos = 0;
// Handle --block-size for single-threaded mode and the first step
// of --block-list.
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
// --block-size doesn't do anything here in threaded mode,
// because the threaded encoder will take care of splitting
// to fixed-sized Blocks.
if (hardware_threads_get() == 1 && opt_block_size > 0)
block_remaining = opt_block_size;
// If --block-list was used, start with the first size.
//
// FIXME: Currently this overrides --block-size but this isn't
// good. For threaded case, we want --block-size to specify
// how big Blocks the encoder needs to be prepared to create
// at maximum and --block-list will simultaneously cause new
// Blocks to be started at specified intervals. To keep things
// logical, the same should be done in single-threaded mode.
if (opt_block_list != NULL)
block_remaining = opt_block_list[list_pos];
}
strm.next_out = out_buf.u8;
strm.avail_out = IO_BUFFER_SIZE;
@ -575,7 +597,17 @@ coder_normal(file_pair *pair)
if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) {
// Start a new Block.
action = LZMA_RUN;
if (opt_block_list == NULL) {
block_remaining = opt_block_size;
} else {
// FIXME: Make it work together with
// --block-size.
if (opt_block_list[list_pos + 1] != 0)
++list_pos;
block_remaining = opt_block_list[list_pos];
}
} else if (ret != LZMA_OK) {
// Determine if the return value indicates that we

View File

@ -48,6 +48,10 @@ extern bool opt_single_stream;
/// of input. This has an effect only when compressing to the .xz format.
extern uint64_t opt_block_size;
/// This is non-NULL if --block-list was used. This contains the Block sizes
/// as an array that is terminated with 0.
extern uint64_t *opt_block_list;
/// Set the integrity check type used when compressing
extern void coder_set_check(lzma_check check);

View File

@ -277,6 +277,7 @@ main(int argc, char **argv)
#ifndef NDEBUG
coder_free();
args_free();
#endif
// If we have got a signal, raise it to kill the program instead

View File

@ -1153,10 +1153,16 @@ message_help(bool long_help)
" does not affect decompressor memory requirements"));
if (long_help) {
// FIXME? Mention something about threading?
puts(_(
" --block-size=SIZE\n"
" when compressing to the .xz format, start a new block\n"
" after every SIZE bytes of input; 0=disabled (default)"));
// FIXME
puts(_(
" --block-list=SIZES\n"
" when compressing to the .xz format, start a new block\n"
" after the given intervals of uncompressed data"));
puts(_( // xgettext:no-c-format
" --memlimit-compress=LIMIT\n"
" --memlimit-decompress=LIMIT\n"

View File

@ -5,7 +5,7 @@
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
.TH XZ 1 "2012-07-01" "Tukaani" "XZ Utils"
.TH XZ 1 "2012-07-03" "Tukaani" "XZ Utils"
.
.SH NAME
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@ -807,7 +807,26 @@ format, split the input data into blocks of
.I size
bytes.
The blocks are compressed independently from each other.
.\" FIXME: Explain how to his can be used for random access and threading.
.\" FIXME: Explain how to these can be used for random access and threading.
.TP
.BI \-\-block\-list= sizes
When compressing to the
.B .xz
format, start a new block after
the given intervals of uncompressed data.
.IP ""
The uncompressed
.I sizes
of the blocks are specified as a comma-separated list.
Omitting a size (two or more consecutive commas) is a shorthand
to use the size of the previous block.
A special value of
.B 0
may be used as the last value to indicate that
the rest of the file should be encoded as a single block.
.IP ""
.B "Currently this option is badly broken if used together with"
.B "\-\-block\-size or with multithreading."
.TP
.BI \-\-memlimit\-compress= limit
Set a memory usage limit for compression.