diff --git a/src/xz/args.c b/src/xz/args.c index 54b3ff32..9a4f82be 100644 --- a/src/xz/args.c +++ b/src/xz/args.c @@ -54,6 +54,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str, } +static void +parse_block_list(char *str) +{ + // It must be non-empty and not begin with a comma. + if (str[0] == '\0' || str[0] == ',') + message_fatal(_("%s: Invalid argument to --block-list"), str); + + // Count the number of comma-separated strings. + size_t count = 1; + for (size_t i = 0; str[i] != '\0'; ++i) + if (str[i] == ',') + ++count; + + // Prevent an unlikely integer overflow. + if (count > SIZE_MAX / sizeof(uint64_t) - 1) + message_fatal(_("%s: Too many arguments to --block-list"), + str); + + // Allocate memory to hold all the sizes specified. + // If --block-list was specified already, its value is forgotten. + free(opt_block_list); + opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); + + for (size_t i = 0; i < count; ++i) { + // Locate the next comma and replace it with \0. + char *p = strchr(str, ','); + if (p != NULL) + *p = '\0'; + + if (str[0] == '\0') { + // There is no string, that is, a comma follows + // another comma. Use the previous value. + // + // NOTE: We checked earler that the first char + // of the whole list cannot be a comma. + assert(i > 0); + opt_block_list[i] = opt_block_list[i - 1]; + } else { + opt_block_list[i] = str_to_uint64("block-list", str, + 0, UINT64_MAX); + + // Zero indicates no more new Blocks. + if (opt_block_list[i] == 0) { + if (i + 1 != count) + message_fatal(_("0 can only be used " + "as the last element " + "in --block-list")); + + opt_block_list[i] = UINT64_MAX; + } + } + + str = p + 1; + } + + // Terminate the array. + opt_block_list[count] = 0; + return; +} + + static void parse_real(args_info *args, int argc, char **argv) { @@ -73,6 +134,7 @@ parse_real(args_info *args, int argc, char **argv) OPT_FILES, OPT_FILES0, OPT_BLOCK_SIZE, + OPT_BLOCK_LIST, OPT_MEM_COMPRESS, OPT_MEM_DECOMPRESS, OPT_NO_ADJUST, @@ -107,6 +169,7 @@ parse_real(args_info *args, int argc, char **argv) { "format", required_argument, NULL, 'F' }, { "check", required_argument, NULL, 'C' }, { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, + { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, { "memlimit", required_argument, NULL, 'M' }, @@ -378,6 +441,11 @@ parse_real(args_info *args, int argc, char **argv) 0, LZMA_VLI_MAX); break; + case OPT_BLOCK_LIST: { + parse_block_list(optarg); + break; + } + case OPT_SINGLE_STREAM: opt_single_stream = true; break; @@ -590,3 +658,13 @@ args_parse(args_info *args, int argc, char **argv) return; } + + +#ifndef NDEBUG +extern void +args_free(void) +{ + free(opt_block_list); + return; +} +#endif diff --git a/src/xz/args.h b/src/xz/args.h index b23f4ef1..53c4a98a 100644 --- a/src/xz/args.h +++ b/src/xz/args.h @@ -40,3 +40,4 @@ extern bool opt_robot; extern const char stdin_filename[]; extern void args_parse(args_info *args, int argc, char **argv); +extern void args_free(void); diff --git a/src/xz/coder.c b/src/xz/coder.c index 2ed88cdb..a98be97f 100644 --- a/src/xz/coder.c +++ b/src/xz/coder.c @@ -26,6 +26,7 @@ enum format_type opt_format = FORMAT_AUTO; bool opt_auto_adjust = true; bool opt_single_stream = false; uint64_t opt_block_size = 0; +uint64_t *opt_block_list = NULL; /// Stream used to communicate with liblzma @@ -522,15 +523,36 @@ coder_normal(file_pair *pair) // Assume that something goes wrong. bool success = false; - // block_remaining indicates how many input bytes to encode until + // block_remaining indicates how many input bytes to encode before // finishing the current .xz Block. The Block size is set with - // --block-size=SIZE. It has an effect only when compressing - // to the .xz format. If block_remaining == UINT64_MAX, only - // a single block is created. + // --block-size=SIZE and --block-list. They have an effect only when + // compressing to the .xz format. If block_remaining == UINT64_MAX, + // only a single block is created. uint64_t block_remaining = UINT64_MAX; - if (hardware_threads_get() == 1 && opt_mode == MODE_COMPRESS - && opt_format == FORMAT_XZ && opt_block_size > 0) - block_remaining = opt_block_size; + + // Position in opt_block_list. Unused if --block-list wasn't used. + size_t list_pos = 0; + + // Handle --block-size for single-threaded mode and the first step + // of --block-list. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { + // --block-size doesn't do anything here in threaded mode, + // because the threaded encoder will take care of splitting + // to fixed-sized Blocks. + if (hardware_threads_get() == 1 && opt_block_size > 0) + block_remaining = opt_block_size; + + // If --block-list was used, start with the first size. + // + // FIXME: Currently this overrides --block-size but this isn't + // good. For threaded case, we want --block-size to specify + // how big Blocks the encoder needs to be prepared to create + // at maximum and --block-list will simultaneously cause new + // Blocks to be started at specified intervals. To keep things + // logical, the same should be done in single-threaded mode. + if (opt_block_list != NULL) + block_remaining = opt_block_list[list_pos]; + } strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; @@ -575,7 +597,17 @@ coder_normal(file_pair *pair) if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) { // Start a new Block. action = LZMA_RUN; - block_remaining = opt_block_size; + + if (opt_block_list == NULL) { + block_remaining = opt_block_size; + } else { + // FIXME: Make it work together with + // --block-size. + if (opt_block_list[list_pos + 1] != 0) + ++list_pos; + + block_remaining = opt_block_list[list_pos]; + } } else if (ret != LZMA_OK) { // Determine if the return value indicates that we diff --git a/src/xz/coder.h b/src/xz/coder.h index 578d2d7e..583da8f6 100644 --- a/src/xz/coder.h +++ b/src/xz/coder.h @@ -48,6 +48,10 @@ extern bool opt_single_stream; /// of input. This has an effect only when compressing to the .xz format. extern uint64_t opt_block_size; +/// This is non-NULL if --block-list was used. This contains the Block sizes +/// as an array that is terminated with 0. +extern uint64_t *opt_block_list; + /// Set the integrity check type used when compressing extern void coder_set_check(lzma_check check); diff --git a/src/xz/main.c b/src/xz/main.c index 4e5b49e5..a8f0683a 100644 --- a/src/xz/main.c +++ b/src/xz/main.c @@ -277,6 +277,7 @@ main(int argc, char **argv) #ifndef NDEBUG coder_free(); + args_free(); #endif // If we have got a signal, raise it to kill the program instead diff --git a/src/xz/message.c b/src/xz/message.c index 2b6ac5f0..abbd1713 100644 --- a/src/xz/message.c +++ b/src/xz/message.c @@ -1153,10 +1153,16 @@ message_help(bool long_help) " does not affect decompressor memory requirements")); if (long_help) { + // FIXME? Mention something about threading? puts(_( " --block-size=SIZE\n" " when compressing to the .xz format, start a new block\n" " after every SIZE bytes of input; 0=disabled (default)")); + // FIXME + puts(_( +" --block-list=SIZES\n" +" when compressing to the .xz format, start a new block\n" +" after the given intervals of uncompressed data")); puts(_( // xgettext:no-c-format " --memlimit-compress=LIMIT\n" " --memlimit-decompress=LIMIT\n" diff --git a/src/xz/xz.1 b/src/xz/xz.1 index 4da09baf..0368f05b 100644 --- a/src/xz/xz.1 +++ b/src/xz/xz.1 @@ -5,7 +5,7 @@ .\" This file has been put into the public domain. .\" You can do whatever you want with this file. .\" -.TH XZ 1 "2012-07-01" "Tukaani" "XZ Utils" +.TH XZ 1 "2012-07-03" "Tukaani" "XZ Utils" . .SH NAME xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files @@ -807,7 +807,26 @@ format, split the input data into blocks of .I size bytes. The blocks are compressed independently from each other. -.\" FIXME: Explain how to his can be used for random access and threading. +.\" FIXME: Explain how to these can be used for random access and threading. +.TP +.BI \-\-block\-list= sizes +When compressing to the +.B .xz +format, start a new block after +the given intervals of uncompressed data. +.IP "" +The uncompressed +.I sizes +of the blocks are specified as a comma-separated list. +Omitting a size (two or more consecutive commas) is a shorthand +to use the size of the previous block. +A special value of +.B 0 +may be used as the last value to indicate that +the rest of the file should be encoded as a single block. +.IP "" +.B "Currently this option is badly broken if used together with" +.B "\-\-block\-size or with multithreading." .TP .BI \-\-memlimit\-compress= limit Set a memory usage limit for compression.