mirror of https://git.tukaani.org/xz.git
xz: Add incomplete support for --block-list.
It's broken with threads and when also --block-size is used.
This commit is contained in:
parent
972179cdcd
commit
88ccf47205
|
@ -54,6 +54,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
parse_block_list(char *str)
|
||||
{
|
||||
// It must be non-empty and not begin with a comma.
|
||||
if (str[0] == '\0' || str[0] == ',')
|
||||
message_fatal(_("%s: Invalid argument to --block-list"), str);
|
||||
|
||||
// Count the number of comma-separated strings.
|
||||
size_t count = 1;
|
||||
for (size_t i = 0; str[i] != '\0'; ++i)
|
||||
if (str[i] == ',')
|
||||
++count;
|
||||
|
||||
// Prevent an unlikely integer overflow.
|
||||
if (count > SIZE_MAX / sizeof(uint64_t) - 1)
|
||||
message_fatal(_("%s: Too many arguments to --block-list"),
|
||||
str);
|
||||
|
||||
// Allocate memory to hold all the sizes specified.
|
||||
// If --block-list was specified already, its value is forgotten.
|
||||
free(opt_block_list);
|
||||
opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
// Locate the next comma and replace it with \0.
|
||||
char *p = strchr(str, ',');
|
||||
if (p != NULL)
|
||||
*p = '\0';
|
||||
|
||||
if (str[0] == '\0') {
|
||||
// There is no string, that is, a comma follows
|
||||
// another comma. Use the previous value.
|
||||
//
|
||||
// NOTE: We checked earler that the first char
|
||||
// of the whole list cannot be a comma.
|
||||
assert(i > 0);
|
||||
opt_block_list[i] = opt_block_list[i - 1];
|
||||
} else {
|
||||
opt_block_list[i] = str_to_uint64("block-list", str,
|
||||
0, UINT64_MAX);
|
||||
|
||||
// Zero indicates no more new Blocks.
|
||||
if (opt_block_list[i] == 0) {
|
||||
if (i + 1 != count)
|
||||
message_fatal(_("0 can only be used "
|
||||
"as the last element "
|
||||
"in --block-list"));
|
||||
|
||||
opt_block_list[i] = UINT64_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
str = p + 1;
|
||||
}
|
||||
|
||||
// Terminate the array.
|
||||
opt_block_list[count] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
parse_real(args_info *args, int argc, char **argv)
|
||||
{
|
||||
|
@ -73,6 +134,7 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
OPT_FILES,
|
||||
OPT_FILES0,
|
||||
OPT_BLOCK_SIZE,
|
||||
OPT_BLOCK_LIST,
|
||||
OPT_MEM_COMPRESS,
|
||||
OPT_MEM_DECOMPRESS,
|
||||
OPT_NO_ADJUST,
|
||||
|
@ -107,6 +169,7 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
{ "format", required_argument, NULL, 'F' },
|
||||
{ "check", required_argument, NULL, 'C' },
|
||||
{ "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
|
||||
{ "block-list", required_argument, NULL, OPT_BLOCK_LIST },
|
||||
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
|
||||
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
|
||||
{ "memlimit", required_argument, NULL, 'M' },
|
||||
|
@ -378,6 +441,11 @@ parse_real(args_info *args, int argc, char **argv)
|
|||
0, LZMA_VLI_MAX);
|
||||
break;
|
||||
|
||||
case OPT_BLOCK_LIST: {
|
||||
parse_block_list(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
case OPT_SINGLE_STREAM:
|
||||
opt_single_stream = true;
|
||||
break;
|
||||
|
@ -590,3 +658,13 @@ args_parse(args_info *args, int argc, char **argv)
|
|||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
#ifndef NDEBUG
|
||||
extern void
|
||||
args_free(void)
|
||||
{
|
||||
free(opt_block_list);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -40,3 +40,4 @@ extern bool opt_robot;
|
|||
extern const char stdin_filename[];
|
||||
|
||||
extern void args_parse(args_info *args, int argc, char **argv);
|
||||
extern void args_free(void);
|
||||
|
|
|
@ -26,6 +26,7 @@ enum format_type opt_format = FORMAT_AUTO;
|
|||
bool opt_auto_adjust = true;
|
||||
bool opt_single_stream = false;
|
||||
uint64_t opt_block_size = 0;
|
||||
uint64_t *opt_block_list = NULL;
|
||||
|
||||
|
||||
/// Stream used to communicate with liblzma
|
||||
|
@ -522,16 +523,37 @@ coder_normal(file_pair *pair)
|
|||
// Assume that something goes wrong.
|
||||
bool success = false;
|
||||
|
||||
// block_remaining indicates how many input bytes to encode until
|
||||
// block_remaining indicates how many input bytes to encode before
|
||||
// finishing the current .xz Block. The Block size is set with
|
||||
// --block-size=SIZE. It has an effect only when compressing
|
||||
// to the .xz format. If block_remaining == UINT64_MAX, only
|
||||
// a single block is created.
|
||||
// --block-size=SIZE and --block-list. They have an effect only when
|
||||
// compressing to the .xz format. If block_remaining == UINT64_MAX,
|
||||
// only a single block is created.
|
||||
uint64_t block_remaining = UINT64_MAX;
|
||||
if (hardware_threads_get() == 1 && opt_mode == MODE_COMPRESS
|
||||
&& opt_format == FORMAT_XZ && opt_block_size > 0)
|
||||
|
||||
// Position in opt_block_list. Unused if --block-list wasn't used.
|
||||
size_t list_pos = 0;
|
||||
|
||||
// Handle --block-size for single-threaded mode and the first step
|
||||
// of --block-list.
|
||||
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
|
||||
// --block-size doesn't do anything here in threaded mode,
|
||||
// because the threaded encoder will take care of splitting
|
||||
// to fixed-sized Blocks.
|
||||
if (hardware_threads_get() == 1 && opt_block_size > 0)
|
||||
block_remaining = opt_block_size;
|
||||
|
||||
// If --block-list was used, start with the first size.
|
||||
//
|
||||
// FIXME: Currently this overrides --block-size but this isn't
|
||||
// good. For threaded case, we want --block-size to specify
|
||||
// how big Blocks the encoder needs to be prepared to create
|
||||
// at maximum and --block-list will simultaneously cause new
|
||||
// Blocks to be started at specified intervals. To keep things
|
||||
// logical, the same should be done in single-threaded mode.
|
||||
if (opt_block_list != NULL)
|
||||
block_remaining = opt_block_list[list_pos];
|
||||
}
|
||||
|
||||
strm.next_out = out_buf.u8;
|
||||
strm.avail_out = IO_BUFFER_SIZE;
|
||||
|
||||
|
@ -575,7 +597,17 @@ coder_normal(file_pair *pair)
|
|||
if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) {
|
||||
// Start a new Block.
|
||||
action = LZMA_RUN;
|
||||
|
||||
if (opt_block_list == NULL) {
|
||||
block_remaining = opt_block_size;
|
||||
} else {
|
||||
// FIXME: Make it work together with
|
||||
// --block-size.
|
||||
if (opt_block_list[list_pos + 1] != 0)
|
||||
++list_pos;
|
||||
|
||||
block_remaining = opt_block_list[list_pos];
|
||||
}
|
||||
|
||||
} else if (ret != LZMA_OK) {
|
||||
// Determine if the return value indicates that we
|
||||
|
|
|
@ -48,6 +48,10 @@ extern bool opt_single_stream;
|
|||
/// of input. This has an effect only when compressing to the .xz format.
|
||||
extern uint64_t opt_block_size;
|
||||
|
||||
/// This is non-NULL if --block-list was used. This contains the Block sizes
|
||||
/// as an array that is terminated with 0.
|
||||
extern uint64_t *opt_block_list;
|
||||
|
||||
/// Set the integrity check type used when compressing
|
||||
extern void coder_set_check(lzma_check check);
|
||||
|
||||
|
|
|
@ -277,6 +277,7 @@ main(int argc, char **argv)
|
|||
|
||||
#ifndef NDEBUG
|
||||
coder_free();
|
||||
args_free();
|
||||
#endif
|
||||
|
||||
// If we have got a signal, raise it to kill the program instead
|
||||
|
|
|
@ -1153,10 +1153,16 @@ message_help(bool long_help)
|
|||
" does not affect decompressor memory requirements"));
|
||||
|
||||
if (long_help) {
|
||||
// FIXME? Mention something about threading?
|
||||
puts(_(
|
||||
" --block-size=SIZE\n"
|
||||
" when compressing to the .xz format, start a new block\n"
|
||||
" after every SIZE bytes of input; 0=disabled (default)"));
|
||||
// FIXME
|
||||
puts(_(
|
||||
" --block-list=SIZES\n"
|
||||
" when compressing to the .xz format, start a new block\n"
|
||||
" after the given intervals of uncompressed data"));
|
||||
puts(_( // xgettext:no-c-format
|
||||
" --memlimit-compress=LIMIT\n"
|
||||
" --memlimit-decompress=LIMIT\n"
|
||||
|
|
23
src/xz/xz.1
23
src/xz/xz.1
|
@ -5,7 +5,7 @@
|
|||
.\" This file has been put into the public domain.
|
||||
.\" You can do whatever you want with this file.
|
||||
.\"
|
||||
.TH XZ 1 "2012-07-01" "Tukaani" "XZ Utils"
|
||||
.TH XZ 1 "2012-07-03" "Tukaani" "XZ Utils"
|
||||
.
|
||||
.SH NAME
|
||||
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
|
||||
|
@ -807,7 +807,26 @@ format, split the input data into blocks of
|
|||
.I size
|
||||
bytes.
|
||||
The blocks are compressed independently from each other.
|
||||
.\" FIXME: Explain how to his can be used for random access and threading.
|
||||
.\" FIXME: Explain how to these can be used for random access and threading.
|
||||
.TP
|
||||
.BI \-\-block\-list= sizes
|
||||
When compressing to the
|
||||
.B .xz
|
||||
format, start a new block after
|
||||
the given intervals of uncompressed data.
|
||||
.IP ""
|
||||
The uncompressed
|
||||
.I sizes
|
||||
of the blocks are specified as a comma-separated list.
|
||||
Omitting a size (two or more consecutive commas) is a shorthand
|
||||
to use the size of the previous block.
|
||||
A special value of
|
||||
.B 0
|
||||
may be used as the last value to indicate that
|
||||
the rest of the file should be encoded as a single block.
|
||||
.IP ""
|
||||
.B "Currently this option is badly broken if used together with"
|
||||
.B "\-\-block\-size or with multithreading."
|
||||
.TP
|
||||
.BI \-\-memlimit\-compress= limit
|
||||
Set a memory usage limit for compression.
|
||||
|
|
Loading…
Reference in New Issue