liblzma: Add lzma_str_to_filters, _from_filters, and _list_filters.

lzma_str_to_filters() uses static error messages which makes
them not very precise. It tells the position in the string
where an error occurred though which helps quite a bit if
applications take advantage of it. Dynamic error messages can
be added later with a new flag if it seems important enough.
This commit is contained in:
Lasse Collin 2022-11-28 21:37:48 +02:00
parent 072ebf7b13
commit cedeeca2ea
5 changed files with 1567 additions and 0 deletions

View File

@ -454,3 +454,261 @@ extern LZMA_API(lzma_ret) lzma_filter_flags_decode(
lzma_filter *filter, const lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size)
lzma_nothrow lzma_attr_warn_unused_result;
/***********
* Strings *
***********/
/**
* \brief Allow or show all filters
*
* By default only the filters supported in the .xz format are accept by
* lzma_str_to_filters() or shown by lzma_str_list_filters().
*/
#define LZMA_STR_ALL_FILTERS UINT32_C(0x01)
/**
* \brief Do not validate the filter chain in lzma_str_to_filters()
*
* By default lzma_str_to_filters() can return an error if the filter chain
* as a whole isn't usable in the .xz format or in the raw encoder or decoder.
* With this flag the validation is skipped (this doesn't affect the handling
* of the individual filter options).
*/
#define LZMA_STR_NO_VALIDATION UINT32_C(0x02)
/**
* \brief Stringify encoder options
*
* Show the filter-specific options that the encoder will use.
* This may be useful for verbose diagnostic messages.
*
* Note that if options were decoded from .xz headers then the encoder options
* may be undefined. This flag shouldn't be used in such a situation.
*/
#define LZMA_STR_ENCODER UINT32_C(0x10)
/**
* \brief Stringify decoder options
*
* Show the filter-specific options that the decoder will use.
* This may be useful for showing what filter options were decoded
* from file headers.
*/
#define LZMA_STR_DECODER UINT32_C(0x20)
/**
* \brief Produce xz-compatible getopt_long() syntax
*
* That is, "delta:dist=2 lzma2:dict=4MiB,pb=1,lp=1" becomes
* "--delta=dist=2 --lzma2=dict=4MiB,pb=1,lp=1".
*
* This syntax is compatible with xz 5.0.0 as long as the filters and
* their options are supported too.
*/
#define LZMA_STR_GETOPT_LONG UINT32_C(0x40)
/**
* \brief Use two dashes "--" instead of a space to separate filters
*
* That is, "delta:dist=2 lzma2:pb=1,lp=1" becomes
* "delta:dist=2--lzma2:pb=1,lp=1". This looks slightly odd but this
* kind of strings should be usable on the command line without quoting.
* However, it is possible that future versions with new filter options
* might produce strings that require shell quoting anyway as the exact
* set of possible characters isn't frozen for now.
*
* It is guaranteed that the single quote (') will never be used in
* filter chain strings (even if LZMA_STR_NO_SPACES isn't used).
*/
#define LZMA_STR_NO_SPACES UINT32_C(0x80)
/**
* \brief Convert a string to a filter chain
*
* This tries to make it easier to write applications that allow users
* to set custom compression options. This only handles the filter
* configuration (including presets) but not the number of threads,
* block size, check type, or memory limits.
*
* The input string can be either a preset or a filter chain. Presets
* begin with a digit 0-9 and may be followed by zero or more flags
* which are lower-case letters. Currently only "e" is supported, matching
* LZMA_PRESET_EXTREME. For partial xz command line syntax compatibility,
* a preset string may start with a single dash "-".
*
* A filter chain consist of one or more "filtername:opt1=value1,opt2=value2"
* strings separated by one or more spaces. Leading and trailing spaces are
* ignored. All names and values must be lower-case. Extra commas in the
* option list are ignored. The order of filters is significant: when
* encoding, the uncompressed input data goes to the leftmost filter first.
* Normally "lzma2" is the last filter in the chain.
*
* If one wishes to avoid spaces, for example, to avoid shell quoting,
* it is possible to use two dashes "--" instead of spaces to separate
* the filters.
*
* For xz command line compatibility, each filter may be prefixed with
* two dashes "--" and the colon ":" separating the filter name from
* the options may be replaced with an equals sign "=".
*
* By default, only filters that can be used in the .xz format are accepted.
* To allow all filters (LZMA1) use the flag LZMA_STR_ALL_FILTERS.
*
* By default, very basic validation is done for the filter chain as a whole,
* for example, that LZMA2 is only used as the last filter in the chain.
* The validation isn't perfect though and it's possible that this function
* succeeds but using the filter chain for encoding or decoding will still
* result in LZMA_OPTIONS_ERROR. To disable this validation, use the flag
* LZMA_STR_NO_VALIDATION.
*
* The available filter names and their options are available via
* lzma_str_list_filters(). See the xz man page for the description
* of filter names and options.
*
* \param str User-supplied string describing a preset or
* a filter chain. If a default value is needed and
* you don't know what would be good, use "6" since
* that is the default preset in xz too.
* \param error_pos If this isn't NULL, this value will be set on
* both success and on all errors. This tells the
* location of the error in the string. This is
* an int to make it straightforward to use this
* as printf() field width. The value is guaranteed
* to be in the range [0, INT_MAX] even if strlen(str)
* somehow was greater than INT_MAX. On success this
* is equal to min(strlen(str), INT_MAX).
* \param filters An array of lzma_filter structures. There must
* be LZMA_FILTERS_MAX + 1 (that is, five) elements
* in the array. The old contents are ignored so it
* doesn't need to be initialized. This array is
* modified only if this function returns LZMA_OK.
* Once the allocated filter options are no longer
* needed, lzma_filters_free() can be used to free the
* options (it doesn't free the filters array itself).
* \param flags Bitwise-or of zero or more of the flags
* LZMA_STR_ALL_FILTERS and LZMA_STR_NO_VALIDATION.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
*
* \return On success, NULL is returned. On error, a statically-allocated
* error message is returned which together with the error_pos
* should give some idea what is wrong.
*
* For command line applications, below is an example how an error message
* can be displayed. Note the use of an empty string for the field width.
* If "^" was used there it would create an off-by-one error except at
* the very beginning of the line.
*
* \code{.c}
* const char *str = ...; // From user
* lzma_filter filters[LZMA_FILTERS_MAX + 1];
* int pos;
* const char *msg = lzma_str_to_filters(str, &pos, filters, 0, NULL);
* if (msg != NULL) {
* printf("%s: Error in XZ compression options:\n", argv[0]);
* printf("%s: %s\n", argv[0], str);
* printf("%s: %*s^\n", argv[0], errpos, "");
* printf("%s: %s\n", argv[0], msg);
* }
* \endcode
*/
extern LZMA_API(const char *) lzma_str_to_filters(
const char *str, int *error_pos, lzma_filter *filters,
uint32_t flags, const lzma_allocator *allocator)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Convert a filter chain to a string
*
* Use cases:
*
* - Verbose output showing the full encoder options to the user
* (use LZMA_STR_ENCODER in flags)
*
* - Showing the filters and options that are required to decode a file
* (use LZMA_STR_DECODER in flags)
*
* - Showing the filter names without any options in informational messages
* where the technical details aren't important (no flags). In this case
* the .options in the filters array are ignored and may be NULL even if
* a filter has a mandatory options structure.
*
* Note that even if the filter chain was specified using a preset,
* the resulting filter chain isn't reversed to a preset. So if you
* specify "6" to lzma_str_to_filters() then lzma_str_from_filters()
* will produce a string containing "lzma2".
*
* \param str On success *str will be set to point an allocated
* string describing the given filter chain. Old
* value is ignored. On error *str is always set
* to NULL.
* \param filters Array of 1-4 filters and a terminating element
* with .id = LZMA_VLI_UNKNOWN.
* \param flags Bitwise-or of zero or more of the flags
* LZMA_STR_ENCODER, LZMA_STR_DECODER,
* LZMA_STR_GETOPT_LONG, and LZMA_STR_NO_SPACES.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
*
* \return - LZMA_OK
* - LZMA_OPTIONS_ERROR: Empty filter chain
* (filters[0].id == LZMA_VLI_UNKNOWN) or the filter chain
* includes a Filter ID that is not supported by this function.
* - LZMA_MEM_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_str_from_filters(
char **str, const lzma_filter *filters, uint32_t flags,
const lzma_allocator *allocator)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief List available filters and/or their options (for help message)
*
* If a filter_id is given then only one line is created which contains the
* filter name. If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then the
* options required for encoding or decoding are listed on the same line too.
*
* If filter_id is LZMA_VLI_UNKNOWN then all filters that can be used in
* the .xz format are listed:
*
* - If neither LZMA_STR_ENCODER nor LZMA_STR_DECODER is used then
* the supported filter names are listed on a single line separated
* by spaces.
*
* - If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then filters and
* the supported options are listed one filter per line. There won't
* be a '\n' after the last filter.
*
* - If LZMA_STR_ALL_FILTERS is used then the list will include also
* those filters that cannot be used in the .xz format (LZMA1).
*
* \param str On success *str will be set to point an allocated
* string listing the filters and options. Old value
* is ignored. On error *str is always set to NULL.
* \param filter_id Filter ID or LZMA_VLI_UNKNOWN.
* \param flags Bitwise-or of zero or more of the flags
* LZMA_STR_ALL_FILTERS, LZMA_STR_ENCODER,
* LZMA_STR_DECODER, and LZMA_STR_GETOPT_LONG.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
*
* \return - LZMA_OK
* - LZMA_OPTIONS_ERROR: Unsupported flags
* - LZMA_MEM_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_str_list_filters(
char **str, lzma_vli filter_id, uint32_t flags,
const lzma_allocator *allocator)
lzma_nothrow lzma_attr_warn_unused_result;

View File

@ -19,6 +19,7 @@ liblzma_la_SOURCES += \
common/index.h \
common/stream_flags_common.c \
common/stream_flags_common.h \
common/string_conversion.c \
common/vli_size.c
if COND_THREADS

File diff suppressed because it is too large Load Diff

View File

@ -115,4 +115,7 @@ global:
lzma_stream_decoder_mt;
lzma_lzip_decoder;
lzma_filters_free;
lzma_str_to_filters;
lzma_str_from_filters;
lzma_str_list_filters;
} XZ_5.2;

View File

@ -130,4 +130,7 @@ global:
lzma_stream_decoder_mt;
lzma_lzip_decoder;
lzma_filters_free;
lzma_str_to_filters;
lzma_str_from_filters;
lzma_str_list_filters;
} XZ_5.2;