xz: Add incomplete support for --block-list.

It's broken with threads and when also --block-size is used.
This commit is contained in:
Lasse Collin 2012-07-03 21:16:39 +03:00
parent 972179cdcd
commit 88ccf47205
7 changed files with 151 additions and 10 deletions

View File

@ -54,6 +54,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
} }
static void
parse_block_list(char *str)
{
// It must be non-empty and not begin with a comma.
if (str[0] == '\0' || str[0] == ',')
message_fatal(_("%s: Invalid argument to --block-list"), str);
// Count the number of comma-separated strings.
size_t count = 1;
for (size_t i = 0; str[i] != '\0'; ++i)
if (str[i] == ',')
++count;
// Prevent an unlikely integer overflow.
if (count > SIZE_MAX / sizeof(uint64_t) - 1)
message_fatal(_("%s: Too many arguments to --block-list"),
str);
// Allocate memory to hold all the sizes specified.
// If --block-list was specified already, its value is forgotten.
free(opt_block_list);
opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
for (size_t i = 0; i < count; ++i) {
// Locate the next comma and replace it with \0.
char *p = strchr(str, ',');
if (p != NULL)
*p = '\0';
if (str[0] == '\0') {
// There is no string, that is, a comma follows
// another comma. Use the previous value.
//
// NOTE: We checked earler that the first char
// of the whole list cannot be a comma.
assert(i > 0);
opt_block_list[i] = opt_block_list[i - 1];
} else {
opt_block_list[i] = str_to_uint64("block-list", str,
0, UINT64_MAX);
// Zero indicates no more new Blocks.
if (opt_block_list[i] == 0) {
if (i + 1 != count)
message_fatal(_("0 can only be used "
"as the last element "
"in --block-list"));
opt_block_list[i] = UINT64_MAX;
}
}
str = p + 1;
}
// Terminate the array.
opt_block_list[count] = 0;
return;
}
static void static void
parse_real(args_info *args, int argc, char **argv) parse_real(args_info *args, int argc, char **argv)
{ {
@ -73,6 +134,7 @@ parse_real(args_info *args, int argc, char **argv)
OPT_FILES, OPT_FILES,
OPT_FILES0, OPT_FILES0,
OPT_BLOCK_SIZE, OPT_BLOCK_SIZE,
OPT_BLOCK_LIST,
OPT_MEM_COMPRESS, OPT_MEM_COMPRESS,
OPT_MEM_DECOMPRESS, OPT_MEM_DECOMPRESS,
OPT_NO_ADJUST, OPT_NO_ADJUST,
@ -107,6 +169,7 @@ parse_real(args_info *args, int argc, char **argv)
{ "format", required_argument, NULL, 'F' }, { "format", required_argument, NULL, 'F' },
{ "check", required_argument, NULL, 'C' }, { "check", required_argument, NULL, 'C' },
{ "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, { "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
{ "block-list", required_argument, NULL, OPT_BLOCK_LIST },
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
{ "memlimit", required_argument, NULL, 'M' }, { "memlimit", required_argument, NULL, 'M' },
@ -378,6 +441,11 @@ parse_real(args_info *args, int argc, char **argv)
0, LZMA_VLI_MAX); 0, LZMA_VLI_MAX);
break; break;
case OPT_BLOCK_LIST: {
parse_block_list(optarg);
break;
}
case OPT_SINGLE_STREAM: case OPT_SINGLE_STREAM:
opt_single_stream = true; opt_single_stream = true;
break; break;
@ -590,3 +658,13 @@ args_parse(args_info *args, int argc, char **argv)
return; return;
} }
#ifndef NDEBUG
extern void
args_free(void)
{
free(opt_block_list);
return;
}
#endif

View File

@ -40,3 +40,4 @@ extern bool opt_robot;
extern const char stdin_filename[]; extern const char stdin_filename[];
extern void args_parse(args_info *args, int argc, char **argv); extern void args_parse(args_info *args, int argc, char **argv);
extern void args_free(void);

View File

@ -26,6 +26,7 @@ enum format_type opt_format = FORMAT_AUTO;
bool opt_auto_adjust = true; bool opt_auto_adjust = true;
bool opt_single_stream = false; bool opt_single_stream = false;
uint64_t opt_block_size = 0; uint64_t opt_block_size = 0;
uint64_t *opt_block_list = NULL;
/// Stream used to communicate with liblzma /// Stream used to communicate with liblzma
@ -522,15 +523,36 @@ coder_normal(file_pair *pair)
// Assume that something goes wrong. // Assume that something goes wrong.
bool success = false; bool success = false;
// block_remaining indicates how many input bytes to encode until // block_remaining indicates how many input bytes to encode before
// finishing the current .xz Block. The Block size is set with // finishing the current .xz Block. The Block size is set with
// --block-size=SIZE. It has an effect only when compressing // --block-size=SIZE and --block-list. They have an effect only when
// to the .xz format. If block_remaining == UINT64_MAX, only // compressing to the .xz format. If block_remaining == UINT64_MAX,
// a single block is created. // only a single block is created.
uint64_t block_remaining = UINT64_MAX; uint64_t block_remaining = UINT64_MAX;
if (hardware_threads_get() == 1 && opt_mode == MODE_COMPRESS
&& opt_format == FORMAT_XZ && opt_block_size > 0) // Position in opt_block_list. Unused if --block-list wasn't used.
block_remaining = opt_block_size; size_t list_pos = 0;
// Handle --block-size for single-threaded mode and the first step
// of --block-list.
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
// --block-size doesn't do anything here in threaded mode,
// because the threaded encoder will take care of splitting
// to fixed-sized Blocks.
if (hardware_threads_get() == 1 && opt_block_size > 0)
block_remaining = opt_block_size;
// If --block-list was used, start with the first size.
//
// FIXME: Currently this overrides --block-size but this isn't
// good. For threaded case, we want --block-size to specify
// how big Blocks the encoder needs to be prepared to create
// at maximum and --block-list will simultaneously cause new
// Blocks to be started at specified intervals. To keep things
// logical, the same should be done in single-threaded mode.
if (opt_block_list != NULL)
block_remaining = opt_block_list[list_pos];
}
strm.next_out = out_buf.u8; strm.next_out = out_buf.u8;
strm.avail_out = IO_BUFFER_SIZE; strm.avail_out = IO_BUFFER_SIZE;
@ -575,7 +597,17 @@ coder_normal(file_pair *pair)
if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) { if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) {
// Start a new Block. // Start a new Block.
action = LZMA_RUN; action = LZMA_RUN;
block_remaining = opt_block_size;
if (opt_block_list == NULL) {
block_remaining = opt_block_size;
} else {
// FIXME: Make it work together with
// --block-size.
if (opt_block_list[list_pos + 1] != 0)
++list_pos;
block_remaining = opt_block_list[list_pos];
}
} else if (ret != LZMA_OK) { } else if (ret != LZMA_OK) {
// Determine if the return value indicates that we // Determine if the return value indicates that we

View File

@ -48,6 +48,10 @@ extern bool opt_single_stream;
/// of input. This has an effect only when compressing to the .xz format. /// of input. This has an effect only when compressing to the .xz format.
extern uint64_t opt_block_size; extern uint64_t opt_block_size;
/// This is non-NULL if --block-list was used. This contains the Block sizes
/// as an array that is terminated with 0.
extern uint64_t *opt_block_list;
/// Set the integrity check type used when compressing /// Set the integrity check type used when compressing
extern void coder_set_check(lzma_check check); extern void coder_set_check(lzma_check check);

View File

@ -277,6 +277,7 @@ main(int argc, char **argv)
#ifndef NDEBUG #ifndef NDEBUG
coder_free(); coder_free();
args_free();
#endif #endif
// If we have got a signal, raise it to kill the program instead // If we have got a signal, raise it to kill the program instead

View File

@ -1153,10 +1153,16 @@ message_help(bool long_help)
" does not affect decompressor memory requirements")); " does not affect decompressor memory requirements"));
if (long_help) { if (long_help) {
// FIXME? Mention something about threading?
puts(_( puts(_(
" --block-size=SIZE\n" " --block-size=SIZE\n"
" when compressing to the .xz format, start a new block\n" " when compressing to the .xz format, start a new block\n"
" after every SIZE bytes of input; 0=disabled (default)")); " after every SIZE bytes of input; 0=disabled (default)"));
// FIXME
puts(_(
" --block-list=SIZES\n"
" when compressing to the .xz format, start a new block\n"
" after the given intervals of uncompressed data"));
puts(_( // xgettext:no-c-format puts(_( // xgettext:no-c-format
" --memlimit-compress=LIMIT\n" " --memlimit-compress=LIMIT\n"
" --memlimit-decompress=LIMIT\n" " --memlimit-decompress=LIMIT\n"

View File

@ -5,7 +5,7 @@
.\" This file has been put into the public domain. .\" This file has been put into the public domain.
.\" You can do whatever you want with this file. .\" You can do whatever you want with this file.
.\" .\"
.TH XZ 1 "2012-07-01" "Tukaani" "XZ Utils" .TH XZ 1 "2012-07-03" "Tukaani" "XZ Utils"
. .
.SH NAME .SH NAME
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@ -807,7 +807,26 @@ format, split the input data into blocks of
.I size .I size
bytes. bytes.
The blocks are compressed independently from each other. The blocks are compressed independently from each other.
.\" FIXME: Explain how to his can be used for random access and threading. .\" FIXME: Explain how to these can be used for random access and threading.
.TP
.BI \-\-block\-list= sizes
When compressing to the
.B .xz
format, start a new block after
the given intervals of uncompressed data.
.IP ""
The uncompressed
.I sizes
of the blocks are specified as a comma-separated list.
Omitting a size (two or more consecutive commas) is a shorthand
to use the size of the previous block.
A special value of
.B 0
may be used as the last value to indicate that
the rest of the file should be encoded as a single block.
.IP ""
.B "Currently this option is badly broken if used together with"
.B "\-\-block\-size or with multithreading."
.TP .TP
.BI \-\-memlimit\-compress= limit .BI \-\-memlimit\-compress= limit
Set a memory usage limit for compression. Set a memory usage limit for compression.