1
0
mirror of https://git.tukaani.org/xz.git synced 2025-10-26 11:02:53 +00:00

Disable the memory usage limiter by default.

For several people, the limiter causes bigger problems that
it solves, so it is better to have it disabled by default.
Those who want to have a limiter by default need to enable
it via the environment variable XZ_DEFAULTS.

Support for environment variable XZ_DEFAULTS was added. It is
parsed before XZ_OPT and technically identical with it. The
intended uses differ quite a bit though; see the man page.

The memory usage limit can now be set separately for
compression and decompression using --memlimit-compress and
--memlimit-decompress. To set both at once, -M or --memlimit
can be used. --memory was retained as a legacy alias for
--memlimit for backwards compatibility.

The semantics of --info-memory were changed in backwards
incompatible way. Compatibility wasn't meaningful due to
changes in the memory usage limiter functionality.

The memory usage limiter info is no longer shown at the
bottom of xz --long -help.

The memory usage limiter support for removed completely from xzdec.

xz's man page was updated to match the above changes. Various
unrelated fixes were also made to the man page.
This commit is contained in:
Lasse Collin 2010-08-07 20:45:18 +03:00
parent 4a45dd4c39
commit 792331bdee
10 changed files with 377 additions and 452 deletions

View File

@ -28,6 +28,32 @@ bool opt_robot = false;
const char *const stdin_filename = "(stdin)";
/// Parse and set the memory usage limit for compression and/or decompression.
static void
parse_memlimit(const char *name, const char *name_percentage, char *str,
bool set_compress, bool set_decompress)
{
bool is_percentage = false;
uint64_t value;
const size_t len = strlen(str);
if (len > 0 && str[len - 1] == '%') {
str[len - 1] = '\0';
is_percentage = true;
value = str_to_uint64(name_percentage, str, 1, 100);
} else {
// On 32-bit systems, SIZE_MAX would make more sense than
// UINT64_MAX. But use UINT64_MAX still so that scripts
// that assume > 4 GiB values don't break.
value = str_to_uint64(name, str, 0, UINT64_MAX);
}
hardware_memlimit_set(
value, set_compress, set_decompress, is_percentage);
return;
}
static void
parse_real(args_info *args, int argc, char **argv)
{
@ -45,6 +71,8 @@ parse_real(args_info *args, int argc, char **argv)
OPT_NO_SPARSE,
OPT_FILES,
OPT_FILES0,
OPT_MEM_COMPRESS,
OPT_MEM_DECOMPRESS,
OPT_NO_ADJUST,
OPT_INFO_MEMORY,
OPT_ROBOT,
@ -75,8 +103,11 @@ parse_real(args_info *args, int argc, char **argv)
// Basic compression settings
{ "format", required_argument, NULL, 'F' },
{ "check", required_argument, NULL, 'C' },
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
{ "memlimit", required_argument, NULL, 'M' },
{ "memory", required_argument, NULL, 'M' }, // Old alias
{ "no-adjust", no_argument, NULL, OPT_NO_ADJUST },
{ "memory", required_argument, NULL, 'M' },
{ "threads", required_argument, NULL, 'T' },
{ "extreme", no_argument, NULL, 'e' },
@ -104,7 +135,7 @@ parse_real(args_info *args, int argc, char **argv)
{ "long-help", no_argument, NULL, 'H' },
{ "version", no_argument, NULL, 'V' },
{ NULL, 0, NULL, 0 }
{ NULL, 0, NULL, 0 }
};
int c;
@ -118,28 +149,25 @@ parse_real(args_info *args, int argc, char **argv)
coder_set_preset(c - '0');
break;
// --memory
case 'M': {
// Support specifying the limit as a percentage of
// installed physical RAM.
size_t len = strlen(optarg);
if (len > 0 && optarg[len - 1] == '%') {
optarg[len - 1] = '\0';
hardware_memlimit_set_percentage(
str_to_uint64(
"memory%", optarg, 1, 100));
} else {
// On 32-bit systems, SIZE_MAX would make more
// sense than UINT64_MAX. But use UINT64_MAX
// still so that scripts that assume > 4 GiB
// values don't break.
hardware_memlimit_set(str_to_uint64(
"memory", optarg,
0, UINT64_MAX));
}
// --memlimit-compress
case OPT_MEM_COMPRESS:
parse_memlimit("memlimit-compress",
"memlimit-compress%", optarg,
true, false);
break;
// --memlimit-decompress
case OPT_MEM_DECOMPRESS:
parse_memlimit("memlimit-decompress",
"memlimit-decompress%", optarg,
false, true);
break;
// --memlimit
case 'M':
parse_memlimit("memlimit", "memlimit%", optarg,
true, true);
break;
}
// --suffix
case 'S':
@ -179,7 +207,7 @@ parse_real(args_info *args, int argc, char **argv)
// --info-memory
case OPT_INFO_MEMORY:
// This doesn't return.
message_memlimit();
hardware_memlimit_show();
// --help
case 'h':
@ -384,9 +412,9 @@ parse_real(args_info *args, int argc, char **argv)
static void
parse_environment(args_info *args, char *argv0)
parse_environment(args_info *args, char *argv0, const char *varname)
{
char *env = getenv("XZ_OPT");
char *env = getenv(varname);
if (env == NULL)
return;
@ -415,8 +443,8 @@ parse_environment(args_info *args, char *argv0)
if (++argc == my_min(
INT_MAX, SIZE_MAX / sizeof(char *)))
message_fatal(_("The environment variable "
"XZ_OPT contains too many "
"arguments"));
"%s contains too many "
"arguments"), varname);
}
}
@ -504,8 +532,9 @@ args_parse(args_info *args, int argc, char **argv)
}
}
// First the flags from environment
parse_environment(args, argv[0]);
// First the flags from the environment
parse_environment(args, argv[0], "XZ_DEFAULTS");
parse_environment(args, argv[0], "XZ_OPT");
// Then from the command line
parse_real(args, argc, argv);

View File

@ -169,7 +169,7 @@ coder_set_compression_settings(void)
// If using --format=raw, we can be decoding. The memusage function
// also validates the filter chain and the options used for the
// filters.
const uint64_t memory_limit = hardware_memlimit_get();
const uint64_t memory_limit = hardware_memlimit_get(opt_mode);
uint64_t memory_usage;
if (opt_mode == MODE_COMPRESS)
memory_usage = lzma_raw_encoder_memusage(filters);
@ -406,12 +406,14 @@ coder_init(file_pair *pair)
case FORMAT_XZ:
ret = lzma_stream_decoder(&strm,
hardware_memlimit_get(), flags);
hardware_memlimit_get(
MODE_DECOMPRESS), flags);
break;
case FORMAT_LZMA:
ret = lzma_alone_decoder(&strm,
hardware_memlimit_get());
hardware_memlimit_get(
MODE_DECOMPRESS));
break;
case FORMAT_RAW:

View File

@ -18,8 +18,11 @@
/// the --threads=NUM command line option.
static uint32_t threadlimit;
/// Memory usage limit
static uint64_t memlimit;
/// Memory usage limit for compression
static uint64_t memlimit_compress;
/// Memory usage limit for decompression
static uint64_t memlimit_decompress;
/// Total amount of physical RAM
static uint64_t total_ram;
@ -49,50 +52,77 @@ hardware_threadlimit_get(void)
extern void
hardware_memlimit_set(uint64_t new_memlimit)
hardware_memlimit_set(uint64_t new_memlimit,
bool set_compress, bool set_decompress, bool is_percentage)
{
if (new_memlimit != 0) {
memlimit = new_memlimit;
} else {
// The default depends on the amount of RAM but so that
// on "low-memory" systems the relative limit is higher
// to make it more likely that files created with "xz -9"
// will still decompress without overriding the limit
// manually.
//
// If 40 % of RAM is 80 MiB or more, use 40 % of RAM as
// the limit.
memlimit = 40 * total_ram / 100;
if (memlimit < UINT64_C(80) * 1024 * 1024) {
// If 80 % of RAM is less than 80 MiB,
// use 80 % of RAM as the limit.
memlimit = 80 * total_ram / 100;
if (memlimit > UINT64_C(80) * 1024 * 1024) {
// Otherwise use 80 MiB as the limit.
memlimit = UINT64_C(80) * 1024 * 1024;
}
}
if (is_percentage) {
assert(new_memlimit > 0);
assert(new_memlimit <= 100);
new_memlimit = (uint32_t)new_memlimit * total_ram / 100;
}
if (set_compress)
memlimit_compress = new_memlimit;
if (set_decompress)
memlimit_decompress = new_memlimit;
return;
}
extern uint64_t
hardware_memlimit_get(enum operation_mode mode)
{
// Zero is a special value that indicates the default. Currently
// the default simply disables the limit. Once there is threading
// support, this might be a little more complex, because there will
// probably be a special case where a user asks for "optimal" number
// of threads instead of a specific number (this might even become
// the default mode). Each thread may use a significant amount of
// memory. When there are no memory usage limits set, we need some
// default soft limit for calculating the "optimal" number of
// threads.
const uint64_t memlimit = mode == MODE_COMPRESS
? memlimit_compress : memlimit_decompress;
return memlimit != 0 ? memlimit : UINT64_MAX;
}
/// Helper for hardware_memlimit_show() to print one human-readable info line.
static void
memlimit_show(const char *str, uint64_t value)
{
// The memory usage limit is considered to be disabled if value
// is 0 or UINT64_MAX. This might get a bit more complex once there
// is threading support. See the comment in hardware_memlimit_get().
if (value == 0 || value == UINT64_MAX)
printf("%s %s\n", str, _("Disabled"));
else
printf("%s %s MiB (%s B)\n", str,
uint64_to_str(round_up_to_mib(value), 0),
uint64_to_str(value, 1));
return;
}
extern void
hardware_memlimit_set_percentage(uint32_t percentage)
hardware_memlimit_show(void)
{
assert(percentage > 0);
assert(percentage <= 100);
if (opt_robot) {
printf("%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\n", total_ram,
memlimit_compress, memlimit_decompress);
} else {
memlimit_show(_("Total amount of physical memory (RAM): "),
total_ram);
memlimit_show(_("Memory usage limit for compression: "),
memlimit_compress);
memlimit_show(_("Memory usage limit for decompression: "),
memlimit_decompress);
}
memlimit = percentage * total_ram / 100;
return;
}
extern uint64_t
hardware_memlimit_get(void)
{
return memlimit;
tuklib_exit(E_SUCCESS, E_ERROR, message_verbosity_get() != V_SILENT);
}
@ -106,7 +136,7 @@ hardware_init(void)
total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024;
// Set the defaults.
hardware_memlimit_set(0);
hardware_memlimit_set(0, true, true, false);
hardware_threadlimit_set(0);
return;
}

View File

@ -23,13 +23,16 @@ extern void hardware_threadlimit_set(uint32_t threadlimit);
extern uint32_t hardware_threadlimit_get(void);
/// Set custom memory usage limit. This is used for both encoding and
/// decoding. Zero indicates resetting the limit back to defaults.
extern void hardware_memlimit_set(uint64_t memlimit);
/// Set the memory usage limit. There are separate limits for compression
/// and decompression (the latter includes also --list), one or both can
/// be set with a single call to this function. Zero indicates resetting
/// the limit back to the defaults. The limit can also be set as a percentage
/// of installed RAM; the percentage must be in the range [1, 100].
extern void hardware_memlimit_set(uint64_t new_memlimit,
bool set_compress, bool set_decompress, bool is_percentage);
/// Set custom memory usage limit as a percentage of installed RAM.
/// The percentage must be in the range [1, 100].
extern void hardware_memlimit_set_percentage(uint32_t percentage);
/// Get the current memory usage limit for compression or decompression.
extern uint64_t hardware_memlimit_get(enum operation_mode mode);
/// Get the current memory usage limit.
extern uint64_t hardware_memlimit_get(void);
/// Display the amount of RAM and memory usage limits and exit.
extern void hardware_memlimit_show(void) lzma_attribute((noreturn));

View File

@ -203,7 +203,7 @@ parse_indexes(xz_file_info *xfi, file_pair *pair)
pos -= index_size;
// See how much memory we can use for decoding this Index.
uint64_t memlimit = hardware_memlimit_get();
uint64_t memlimit = hardware_memlimit_get(MODE_LIST);
uint64_t memused = 0;
if (combined_index != NULL) {
memused = lzma_index_memused(combined_index);

View File

@ -854,7 +854,7 @@ message_mem_needed(enum message_verbosity v, uint64_t memusage)
// Show the memory usage limit as MiB unless it is less than 1 MiB.
// This way it's easy to notice errors where one has typed
// --memory=123 instead of --memory=123MiB.
uint64_t memlimit = hardware_memlimit_get();
uint64_t memlimit = hardware_memlimit_get(opt_mode);
if (memlimit < (UINT32_C(1) << 20)) {
snprintf(memlimitstr, sizeof(memlimitstr), "%s B",
uint64_to_str(memlimit, 1));
@ -1052,21 +1052,6 @@ message_try_help(void)
}
extern void
message_memlimit(void)
{
if (opt_robot)
printf("%" PRIu64 "\n", hardware_memlimit_get());
else
printf(_("%s MiB (%s bytes)\n"),
uint64_to_str(
round_up_to_mib(hardware_memlimit_get()), 0),
uint64_to_str(hardware_memlimit_get(), 1));
tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT);
}
extern void
message_version(void)
{
@ -1138,12 +1123,16 @@ message_help(bool long_help)
" ratio without increasing memory usage of the decoder"));
if (long_help) {
puts(_( // xgettext:no-c-format
" --memlimit-compress=LIMIT\n"
" --memlimit-decompress=LIMIT\n"
" -M, --memlimit=LIMIT\n"
" set memory usage limit for compression, decompression,\n"
" or both; LIMIT is in bytes, % of RAM, or 0 for defaults"));
puts(_(
" --no-adjust if compression settings exceed the memory usage limit,\n"
" give an error instead of adjusting the settings downwards"));
puts(_( // xgettext:no-c-format
" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n"
" the default setting, which is 40 % of total RAM"));
}
if (long_help) {
@ -1201,7 +1190,8 @@ message_help(bool long_help)
" --robot use machine-parsable messages (useful for scripts)"));
puts("");
puts(_(
" --info-memory display the memory usage limit and exit"));
" --info-memory display the total amount of RAM and the currently active\n"
" memory usage limits, and exit"));
puts(_(
" -h, --help display the short help (lists only the basic options)\n"
" -H, --long-help display this long help and exit"));
@ -1216,15 +1206,6 @@ message_help(bool long_help)
puts(_("\nWith no FILE, or when FILE is -, read standard input.\n"));
if (long_help) {
printf(_(
"On this system and configuration, this program will use a maximum of roughly\n"
"%s MiB RAM and "), uint64_to_str(round_up_to_mib(hardware_memlimit_get()), 0));
printf(N_("one thread.\n\n", "%s threads.\n\n",
hardware_threadlimit_get()),
uint64_to_str(hardware_threadlimit_get(), 0));
}
// TRANSLATORS: This message indicates the bug reporting address
// for this package. Please add _another line_ saying
// "Report translation bugs to <...>\n" with the email or WWW

View File

@ -107,10 +107,6 @@ extern void message_filters_show(
extern void message_try_help(void);
/// Print the memory usage limit and exit.
extern void message_memlimit(void) lzma_attribute((noreturn));
/// Prints the version number to stdout and exits with exit status SUCCESS.
extern void message_version(void) lzma_attribute((noreturn));

View File

@ -5,7 +5,7 @@
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
.TH XZ 1 "2010-07-28" "Tukaani" "XZ Utils"
.TH XZ 1 "2010-08-07" "Tukaani" "XZ Utils"
.SH NAME
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
.SH SYNOPSIS
@ -188,52 +188,56 @@ The memory usage of
.B xz
varies from a few hundred kilobytes to several gigabytes depending on
the compression settings. The settings used when compressing a file
affect also the memory usage of the decompressor. Typically the decompressor
needs only 5\ % to 20\ % of the amount of RAM that the compressor needed when
creating the file. Still, the worst-case memory usage of the decompressor
is several gigabytes.
determine the memory requirements of the decompressor. Typically the
decompressor needs only 5\ % to 20\ % of the amount of memory that the
compressor needed when creating the file. For example, decompressing a
file created with
.B xz \-9
currently requires 65 MiB of memory. Still, it is possible to have
.B .xz
files that need several gigabytes of memory to decompress.
.PP
To prevent uncomfortable surprises caused by huge memory usage,
Especially users of older systems may find the possibility of very large
memory usage annoying. To prevent uncomfortable surprises,
.B xz
has a built-in memory usage limiter. While some operating systems provide
ways to limit the memory usage of processes, relying on it wasn't deemed
to be flexible enough. The default limit depends on the total amount of
physical RAM:
.IP \(bu 3
If 40\ % of RAM is at least 80 MiB, 40\ % of RAM is used as the limit.
.IP \(bu 3
If 80\ % of RAM is less than 80 MiB, 80\ % of RAM is used as the limit.
.IP \(bu 3
Otherwise 80 MiB is used as the limit.
has a built-in memory usage limiter, which is disabled by default.
While some operating systems provide ways to limit the memory usage of
processes, relying on it wasn't deemed to be flexible enough (e.g. using
.BR ulimit (1)
to limit virtual memory tends to cripple
.BR mmap (2)).
.PP
When compressing, if the selected compression settings exceed the memory
usage limit, the settings are automatically adjusted downwards and a notice
about this is displayed. As an exception, if the memory usage limit is
exceeded when compressing with
.B \-\-format=raw
or
.BR \-\-no\-adjust ,
an error is displayed and
The memory usage limiter can be enabled with the command line option
\fB\-\-memlimit=\fIlimit\fR, but often it is more convenient to enable
the limiter by default by setting the environment variable
.BR XZ_DEFAULTS ,
e.g.
.BR XZ_DEFAULTS=\-\-memlimit=150MiB .
It is possible to set the limits separately for compression and decompression
by using \fB\-\-memlimit\-compress=\fIlimit\fR and
\fB\-\-memlimit\-decompress=\fIlimit\fR, respectively.
Using these two options outside
.B XZ_DEFAULTS
is rarely useful, because a single run of
.B xz
will exit with exit status
.BR 1 .
cannot do both compression and decompression and
.BI \-\-memlimit= limit
(or \fB\-M\fR \fIlimit\fR)
is shorter to type on the command line.
.PP
If source
.I file
cannot be decompressed without exceeding the memory usage limit, an error
message is displayed and the file is skipped. Note that compressed files
may contain many blocks, which may have been compressed with different
settings. Typically all blocks will have roughly the same memory requirements,
but it is possible that a block later in the file will exceed the memory usage
limit, and an error about too low memory usage limit gets displayed after some
data has already been decompressed.
.PP
The absolute value of the active memory usage limit can be seen with
.B \-\-info-memory
or near the bottom of the output of
.BR \-\-long\-help .
The default limit can be overridden with
\fB\-\-memory=\fIlimit\fR.
If the specified memory usage limit is exceeded when decompressing,
.B xz
will display an error and decompressing the file will fail.
If the limit is exceeded when compressing,
.B xz
will try to scale the settings down so that the limit is no longer exceeded
(except when using \fB\-\-format=raw\fR or \fB\-\-no\-adjust\fR).
This way the operation won't fail unless the limit is very small. The scaling
of the settings is done in steps that don't match the compression level
presets, e.g. if the limit is only slightly less than the amount required for
.BR "xz \-9" ,
the settings will be scaled down only a little, not all the way down to
.BR "xz \-8" .
.SS Concatenation and padding with .xz files
It is possible to concatenate
.B .xz
@ -363,7 +367,7 @@ doesn't recognize the type of the source file,
.B xz
will copy the source file as is to standard output. This allows using
.B xzcat
.B \--force
.B \-\-force
like
.BR cat (1)
for files that have not been compressed with
@ -380,7 +384,7 @@ can be used to restrict
to decompress only a single file format.
.RE
.TP
.BR \-c ", " \-\-stdout ", " \-\-to-stdout
.BR \-c ", " \-\-stdout ", " \-\-to\-stdout
Write the compressed or decompressed data to standard output instead of
a file. This implies
.BR \-\-keep .
@ -559,12 +563,8 @@ due to speed and memory usage.
The exact compression settings (filter chain) used by each preset may
vary between
.B xz
versions. The settings may also vary between files being compressed, if
.B xz
determines that modified settings will probably give better compression
ratio without significantly affecting compression time or memory usage.
.IP
Because the settings may vary, the memory usage may vary too. The following
versions. Because the settings may vary, the memory usage may vary
slightly too. FIXME The following
table lists the maximum memory usage of each preset level, which won't be
exceeded even in future versions of
.BR xz .
@ -590,12 +590,6 @@ Preset;Compression;Decompression
.TE
.RE
.RE
.IP
When compressing,
.B xz
automatically adjusts the compression settings downwards if
the memory usage limit would be exceeded, so it is safe to specify
a high preset level even on systems that don't have lots of RAM.
.TP
.BR \-\-fast " and " \-\-best
These are somewhat misleading aliases for
@ -619,16 +613,25 @@ of the compressor or decompressor (exception: compressor memory usage may
increase a little with presets \fB\-0\fR ... \fB\-2\fR). The downside is that
the compression time will increase dramatically (it can easily double).
.TP
.BI \-\-memlimit\-compress= limit
Set a memory usage limit for compression. If this option is specified
multiple times, the last one takes effect.
.IP
If the compression settings exceed the
.IR limit ,
.B xz
will adjust the settings downwards so that the limit is no longer exceeded
and display a notice that automatic adjustment was done. Adjustment is never
done when compressing with
.B \-\-format=raw
or if
.B \-\-no\-adjust
Display an error and exit if the compression settings exceed the
the memory usage limit. The default is to adjust the settings downwards so
that the memory usage limit is not exceeded. Automatic adjusting is
always disabled when creating raw streams
.RB ( \-\-format=raw ).
.TP
\fB\-M\fR \fIlimit\fR, \fB\-\-memory=\fIlimit
Set the memory usage limit. If this option is specified multiple times,
the last one takes effect. The
has been specified. In those cases, an error is displayed and
.B xz
will exit with exit status
.BR 1 .
.IP
The
.I limit
can be specified in multiple ways:
.RS
@ -638,52 +641,80 @@ The
can be an absolute value in bytes. Using an integer suffix like
.B MiB
can be useful. Example:
.B "\-\-memory=80MiB"
.B "\-\-memlimit\-compress=80MiB"
.IP \(bu 3
The
.I limit
can be specified as a percentage of physical RAM. Example:
.B "\-\-memory=70%"
can be specified as a percentage of total physical memory (RAM).
This can be useful especially when setting the
.B XZ_DEFAULTS
environment variable in a shell initialization script that is shared
between different computers. That way the limit is automatically bigger
on systems with more memory. Example:
.B "\-\-memlimit\-compress=70%"
.IP \(bu 3
The
.I limit
can be reset back to its default value by setting it to
.BR 0 .
See the section
.B "Memory usage"
for how the default limit is defined.
.IP \(bu 3
The memory usage limiting can be effectively disabled by setting
This is currently equivalent to setting the
.I limit
to
.BR max .
This isn't recommended. It's usually better to use, for example,
.BR \-\-memory=90% .
.B max
i.e. no memory usage limit. Once multithreading support has been implemented,
there may be a difference between
.B 0
and
.B max
for the multithreaded case, so it is recommended to use
.B 0
instead of
.B max
at least until the details have been decided.
.RE
.IP
The current
.I limit
can be seen near the bottom of the output of the
.B \-\-long-help
option.
See also the section
.BR "Memory usage" .
.TP
.BI \-\-memlimit\-decompress= limit
Set a memory usage limit for decompression. This affects also the
.B \-\-list
mode. If the operation is not possible without exceeding the
.IR limit ,
.B xz
will display an error and decompressing the file will fail. See
.BI \-\-memlimit\-compress= limit
for possible ways to specify the
.IR limit .
.TP
\fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit
This is equivalent to specifying \fB\-\-memlimit\-compress=\fIlimit
\fB\-\-memlimit\-decompress=\fIlimit\fR.
.TP
.B \-\-no\-adjust
Display an error and exit if the compression settings exceed the
the memory usage limit. The default is to adjust the settings downwards so
that the memory usage limit is not exceeded. Automatic adjusting is
always disabled when creating raw streams
.RB ( \-\-format=raw ).
.TP
\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads
Specify the maximum number of worker threads to use. The default is
the number of available CPU cores. You can see the current value of
.I threads
near the end of the output of the
.B \-\-long\-help
option.
.IP
The actual number of worker threads can be less than
Specify the number of worker threads to use. The actual number of threads
can be less than
.I threads
if using more threads would exceed the memory usage limit.
In addition to CPU-intensive worker threads,
.B xz
may use a few auxiliary threads, which don't use a lot of CPU time.
.IP
.B "Multithreaded compression and decompression are not implemented yet,"
.B "so this option has no effect for now."
.IP
.B "As of writing (2010-08-07), it hasn't been decided if threads will be"
.B "used by default on multicore systems once support for threading has"
.B "been implemented. Comments are welcome."
The complicating factor is that using many threads will increase the memory
usage dramatically. Note that if multithreading will be the default,
it will be done so that single-threaded and multithreaded modes produce
the same output, so compression ratio won't be significantly affected if
threading will be enabled by default.
.SS Custom compressor filter chains
A custom filter chain allows specifying the compression settings in detail
instead of relying on the settings associated to the preset levels.
@ -1037,7 +1068,8 @@ Currently only simple byte-wise delta calculation is supported. It can
be useful when compressing e.g. uncompressed bitmap images or uncompressed
PCM audio. However, special purpose algorithms may give significantly better
results than Delta + LZMA2. This is true especially with audio, which
compresses faster and better e.g. with FLAC.
compresses faster and better e.g. with
.BR flac (1).
.IP
Supported
.IR options :
@ -1087,18 +1119,17 @@ processed so far.
.IP \(bu 3
Compression or decompression speed. This is measured as the amount of
uncompressed data consumed (compression) or produced (decompression)
per second. It is shown once a few seconds have passed since
per second. It is shown after a few seconds have passed since
.B xz
started processing the file.
.IP \(bu 3
Elapsed time or estimated time remaining.
Elapsed time is displayed in the format M:SS or H:MM:SS.
The estimated remaining time is displayed in a less precise format
which never has colons, for example, 2 min 30 s. The estimate can
be shown only when the size of the input file is known and a couple of
seconds have already passed since
Elapsed time in the format M:SS or H:MM:SS.
.IP \(bu 3
Estimated remaining time is shown only when the size of the input file is
known and a couple of seconds have already passed since
.B xz
started processing the file.
started processing the file. The time is shown in a less precise format which
never has any colons, e.g. 2 min 30 s.
.RE
.IP
When standard error is not a terminal,
@ -1106,11 +1137,11 @@ When standard error is not a terminal,
will make
.B xz
print the filename, compressed size, uncompressed size, compression ratio,
speed, and elapsed time on a single line to standard error after
compressing or decompressing the file. If operating took at least a few
seconds, also the speed and elapsed time are printed. If the operation
didn't finish, for example due to user interruption, also the completion
percentage is printed if the size of the input file is known.
and possibly also the speed and elapsed time on a single line to standard
error after compressing or decompressing the file. The speed and elapsed
time are included only when the operation took at least a few seconds.
If the operation didn't finish, for example due to user interruption, also
the completion percentage is printed if the size of the input file is known.
.TP
.BR \-Q ", " \-\-no\-warn
Don't set the exit status to
@ -1133,12 +1164,11 @@ releases. See the section
.B "ROBOT MODE"
for details.
.TP
.BR \-\-info-memory
Display the current memory usage limit in human-readable format on
a single line, and exit successfully. To see how much RAM
.BR \-\-info\-memory
Display, in human-readable format, how much physical memory (RAM)
.B xz
thinks your system has, use
.BR "\-\-memory=100% \-\-info\-memory" .
thinks the system has and the memory usage limits for compression
and decompression, and exit successfully.
.TP
.BR \-h ", " \-\-help
Display a help message describing the most commonly used options,
@ -1165,7 +1195,7 @@ easier to parse by other programs. Currently
.B \-\-robot
is supported only together with
.BR \-\-version ,
.BR \-\-info-memory ,
.BR \-\-info\-memory ,
and
.BR \-\-list .
It will be supported for normal compression and decompression in the future.
@ -1216,10 +1246,24 @@ and
5.0.0 is
.BR 50000002 .
.SS Memory limit information
.B "xz \-\-robot \-\-info-memory"
prints the current memory usage limit as bytes on a single line.
To get the total amount of installed RAM, use
.BR "xz \-\-robot \-\-memory=100% \-\-info-memory" .
.B "xz \-\-robot \-\-info\-memory"
prints a single line with three tab-separated columns:
.RS
.IP 1. 4
Total amount of physical memory (RAM) as bytes
.IP 2. 4
Memory usage limit for compression as bytes.
A special value of zero indicates the default setting,
which for single-threaded mode is the same as no limit.
.IP 3. 4
Memory usage limit for decompression as bytes.
A special value of zero indicates the default setting,
which for single-threaded mode is the same as no limit.
.RE
.PP
In the future, the output of
.B "xz \-\-robot \-\-info\-memory"
may have more columns, but never more than a single line.
.SS List mode
.B "xz \-\-robot \-\-list"
uses tab-separated output. The first column of every line has a string
@ -1455,16 +1499,52 @@ Something worth a warning occurred, but no actual errors occurred.
Notices (not warnings or errors) printed on standard error don't affect
the exit status.
.SH ENVIRONMENT
.TP
.B XZ_OPT
A space-separated list of options is parsed from
.B XZ_OPT
before parsing the options given on the command line. Note that only
options are parsed from
.BR XZ_OPT ;
all non-options are silently ignored. Parsing is done with
.B xz
parses space-separated lists of options from the environment variables
.B XZ_DEFAULTS
and
.BR XZ_OPT ,
in this order, before parsing the options from the command line. Note that
only options are parsed from the environment variables; all non-options
are silently ignored. Parsing is done with
.BR getopt_long (3)
which is used also for the command line arguments.
.TP
.B XZ_DEFAULTS
User-specific or system-wide default options.
Typically this is set in a shell initialization script to enable
.BR xz 's
memory usage limiter by default. Excluding shell initialization scripts
and similar special cases, scripts must never set or unset
.BR XZ_DEFAULTS .
.TP
.B XZ_OPT
This is for passing options to
.B xz
when it is not possible to set the options directly on the
.B xz
command line. This is the case e.g. when
.B xz
is run by a script or tool, e.g. GNU
.BR tar (1):
.RS
.IP
\fBXZ_OPT=\-2v tar caf foo.tar.xz foo
.RE
.IP
Scripts may use
.B XZ_OPT
e.g. to set script-specific default compression options.
It is still recommended to allow users to override
.B XZ_OPT
if that is reasonable, e.g. in
.BR sh (1)
scripts one may use something like this:
.RS
.IP
\fBXZ_OPT=${XZ_OPT\-"\-7e"}; export XZ_OPT
.RE
.IP
.SH "LZMA UTILS COMPATIBILITY"
The command line syntax of
.B xz
@ -1663,7 +1743,7 @@ XZ Embedded supports BCJ filters, but only with the default start offset.
A mix of compressed and uncompressed files can be decompressed
to standard output with a single command:
.IP
.B "xz -dcf a.txt b.txt.xz c.txt d.txt.xz > abcd.txt"
.B "xz \-dcf a.txt b.txt.xz c.txt d.txt.xz > abcd.txt"
.SS Parallel compression of many files
On GNU and *BSD,
.BR find (1)
@ -1672,7 +1752,8 @@ and
can be used to parallelize compression of many files:
.PP
.IP
.B "find . \-type f \e! \-name '*.xz' \-print0 | xargs \-0r \-P4 \-n16 xz"
.B "find . \-type f \e! \-name '*.xz' \-print0 |"
.B "xargs \-0r \-P4 \-n16 xz \-T1"
.PP
The
.B \-P
@ -1690,11 +1771,19 @@ or even more may be appropriate to reduce the number of
processes that
.BR xargs (1)
will eventually create.
.PP
The option
.B \-T1
for
.B xz
is there to force it to single-threaded mode, because
.BR xargs (1)
is used to control the amount of parallelization.
.SS Robot mode examples
Calculating how many bytes have been saved in total after compressing
multiple files:
.IP
.B "xz --robot --list *.xz | awk '/^totals/{print $5\-$4}'"
.B "xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}'"
.SH "SEE ALSO"
.BR xzdec (1),
.BR gzip (1),

View File

@ -4,7 +4,7 @@
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
.TH XZDEC 1 "2010-03-07" "Tukaani" "XZ Utils"
.TH XZDEC 1 "2010-08-07" "Tukaani" "XZ Utils"
.SH NAME
xzdec, lzmadec \- Small .xz and .lzma decompressors
.SH SYNOPSIS
@ -44,8 +44,10 @@ files.
To reduce the size of the executable,
.B xzdec
doesn't support multithreading or localization, and doesn't read options from
.B XZ_DEFAULTS
and
.B XZ_OPT
environment variable.
environment variables.
.B xzdec
doesn't support displaying intermediate progress information: sending
.B SIGINFO
@ -77,45 +79,6 @@ compatibility.
.B xzdec
always writes the decompressed data to standard output.
.TP
\fB\-M\fR \fIlimit\fR, \fB\-\-memory=\fIlimit
Set the memory usage
.IR limit .
If this option is specified multiple times, the last one takes effect. The
.I limit
can be specified in multiple ways:
.RS
.IP \(bu 3
The
.I limit
can be an absolute value in bytes. Using an integer suffix like
.B MiB
can be useful. Example:
.B "\-\-memory=80MiB"
.IP \(bu 3
The
.I limit
can be specified as a percentage of physical RAM. Example:
.B "\-\-memory=70%"
.IP \(bu 3
The
.I limit
can be reset back to its default value by setting it to
.BR 0 .
.IP \(bu 3
The memory usage limiting can be effectively disabled by setting
.I limit
to
.BR max .
This isn't recommended. It's usually better to use, for example,
.BR \-\-memory=90% .
.RE
.IP
The current
.I limit
can be seen near the bottom of the output of the
.B \-\-help
option.
.TP
.BR \-q ", " \-\-quiet
Specifying this once does nothing since
.B xzdec

View File

@ -35,12 +35,6 @@
#endif
/// Number of bytes to use memory at maximum
static uint64_t memlimit;
/// Total amount of physical RAM
static uint64_t total_ram;
/// Error messages are suppressed if this is zero, which is the case when
/// --quiet has been given at least twice.
static unsigned int display_errors = 2;
@ -66,10 +60,6 @@ my_errorf(const char *fmt, ...)
static void lzma_attribute((noreturn))
help(void)
{
// Round up to the next MiB and do it correctly also with UINT64_MAX.
const uint64_t mem_mib = (memlimit >> 20)
+ ((memlimit & ((UINT32_C(1) << 20) - 1)) != 0);
printf(
"Usage: %s [OPTION]... [FILE]...\n"
"Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
@ -77,7 +67,6 @@ help(void)
" -c, --stdout (ignored)\n"
" -d, --decompress (ignored)\n"
" -k, --keep (ignored)\n"
" -M, --memory=NUM use NUM bytes of memory at maximum (0 means default)\n"
" -q, --quiet specify *twice* to suppress errors\n"
" -Q, --no-warn (ignored)\n"
" -h, --help display this help and exit\n"
@ -85,11 +74,9 @@ help(void)
"\n"
"With no FILE, or when FILE is -, read standard input.\n"
"\n"
"On this system and configuration, this program will use a maximum of roughly\n"
"%" PRIu64 " MiB RAM.\n"
"\n"
"Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname, mem_mib);
PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
}
@ -104,126 +91,6 @@ version(void)
}
/// Find out the amount of physical memory (RAM) in the system, and set
/// the memory usage limit to the given percentage of RAM.
static void
memlimit_set_percentage(uint32_t percentage)
{
memlimit = percentage * total_ram / 100;
return;
}
/// Set the memory usage limit to give number of bytes. Zero is a special
/// value to indicate the default limit.
static void
memlimit_set(uint64_t new_memlimit)
{
if (new_memlimit != 0) {
memlimit = new_memlimit;
} else {
memlimit = 40 * total_ram / 100;
if (memlimit < UINT64_C(80) * 1024 * 1024) {
memlimit = 80 * total_ram / 100;
if (memlimit > UINT64_C(80) * 1024 * 1024)
memlimit = UINT64_C(80) * 1024 * 1024;
}
}
return;
}
/// Get the total amount of physical RAM and set the memory usage limit
/// to the default value.
static void
memlimit_init(void)
{
// If we cannot determine the amount of RAM, use the assumption
// defined by the configure script.
total_ram = lzma_physmem();
if (total_ram == 0)
total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024;
memlimit_set(0);
return;
}
/// \brief Convert a string to uint64_t
///
/// This is rudely copied from src/xz/util.c and modified a little. :-(
/// Since this function is used only for parsing the memory usage limit,
/// this cheats a little and saturates too big values to UINT64_MAX instead
/// of giving an error.
///
/// \param max Return value when the string "max" was specified.
///
static uint64_t
str_to_uint64(const char *value, uint64_t max)
{
uint64_t result = 0;
// Accept special value "max".
if (strcmp(value, "max") == 0)
return max;
if (*value < '0' || *value > '9') {
my_errorf("%s: Value is not a non-negative decimal integer",
value);
exit(EXIT_FAILURE);
}
do {
// Don't overflow.
if (result > UINT64_MAX / 10)
return UINT64_MAX;
result *= 10;
// Another overflow check
const uint32_t add = *value - '0';
if (UINT64_MAX - add < result)
return UINT64_MAX;
result += add;
++value;
} while (*value >= '0' && *value <= '9');
if (*value != '\0') {
// Look for suffix.
uint64_t multiplier = 0;
if (*value == 'k' || *value == 'K')
multiplier = UINT64_C(1) << 10;
else if (*value == 'm' || *value == 'M')
multiplier = UINT64_C(1) << 20;
else if (*value == 'g' || *value == 'G')
multiplier = UINT64_C(1) << 30;
++value;
// Allow also e.g. Ki, KiB, and KB.
if (*value != '\0' && strcmp(value, "i") != 0
&& strcmp(value, "iB") != 0
&& strcmp(value, "B") != 0)
multiplier = 0;
if (multiplier == 0) {
my_errorf("%s: Invalid suffix", value - 1);
exit(EXIT_FAILURE);
}
// Don't overflow here either.
if (result > UINT64_MAX / multiplier)
result = UINT64_MAX;
else
result *= multiplier;
}
return result;
}
/// Parses command line options.
static void
parse_options(int argc, char **argv)
@ -235,7 +102,6 @@ parse_options(int argc, char **argv)
{ "decompress", no_argument, NULL, 'd' },
{ "uncompress", no_argument, NULL, 'd' },
{ "keep", no_argument, NULL, 'k' },
{ "memory", required_argument, NULL, 'M' },
{ "quiet", no_argument, NULL, 'q' },
{ "no-warn", no_argument, NULL, 'Q' },
{ "help", no_argument, NULL, 'h' },
@ -254,31 +120,6 @@ parse_options(int argc, char **argv)
case 'Q':
break;
case 'M': {
// Support specifying the limit as a percentage of
// installed physical RAM.
const size_t len = strlen(optarg);
if (len > 0 && optarg[len - 1] == '%') {
// Memory limit is a percentage of total
// installed RAM.
optarg[len - 1] = '\0';
const uint64_t percentage
= str_to_uint64(optarg, 100);
if (percentage < 1 || percentage > 100) {
my_errorf("Percentage must be in "
"the range [1, 100]");
exit(EXIT_FAILURE);
}
memlimit_set_percentage(percentage);
} else {
memlimit_set(str_to_uint64(
optarg, UINT64_MAX));
}
break;
}
case 'q':
if (display_errors > 0)
--display_errors;
@ -307,13 +148,12 @@ uncompress(lzma_stream *strm, FILE *file, const char *filename)
// Initialize the decoder
#ifdef LZMADEC
ret = lzma_alone_decoder(strm, memlimit);
ret = lzma_alone_decoder(strm, UINT64_MAX);
#else
ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
#endif
// The only reasonable error here is LZMA_MEM_ERROR.
// FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
if (ret != LZMA_OK) {
my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
: "Internal error (bug)");
@ -401,10 +241,6 @@ uncompress(lzma_stream *strm, FILE *file, const char *filename)
msg = strerror(ENOMEM);
break;
case LZMA_MEMLIMIT_ERROR:
msg = "Memory usage limit reached";
break;
case LZMA_FORMAT_ERROR:
msg = "File format not recognized";
break;
@ -440,10 +276,6 @@ main(int argc, char **argv)
// Initialize progname which we will be used in error messages.
tuklib_progname_init(argv);
// Set the default memory usage limit. This is needed before parsing
// the command line arguments.
memlimit_init();
// Parse the command line options.
parse_options(argc, argv);