From 6a4a4a7d2667837dc824c26fcb19ed6ca5aff645 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 17 Jul 2022 21:36:25 +0300 Subject: [PATCH] xzgrep: Add more LC_ALL=C to avoid bugs with multibyte characters. Also replace one use of expr with printf. The rationale for LC_ALL=C was already mentioned in 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 that fixed a security issue. However, unrelated uses weren't changed in that commit yet. POSIX says that with sed and such tools one should use LC_ALL=C to ensure predictable behavior when strings contain byte sequences that aren't valid multibyte characters in the current locale. See under "Application usage" in here: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html With GNU sed invalid multibyte strings would work without this; it's documented in its Texinfo manual. Some other implementations aren't so forgiving. --- src/scripts/xzgrep.in | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/scripts/xzgrep.in b/src/scripts/xzgrep.in index fce7940a..c851c7ed 100644 --- a/src/scripts/xzgrep.in +++ b/src/scripts/xzgrep.in @@ -75,9 +75,10 @@ while test $# -ne 0; do # For example, "grep -25F" is equivalent to "grep -C25 -F". If only # digits are specified like "grep -25" we don't get here because the # above pattern in the case-statement doesn't match such strings. - arg2=-\'$(expr "X${option}X" : 'X-.[0-9]*\(.*\)' | sed "$escape") + arg2=-\'$(LC_ALL=C expr "X${option}X" : 'X-.[0-9]*\(.*\)' | + LC_ALL=C sed "$escape") eval "set -- $arg2 "'${1+"$@"}' - option=$(expr "X$option" : 'X\(-.[0-9]*\)');; + option=$(LC_ALL=C expr "X$option" : 'X\(-.[0-9]*\)');; (--binary-*=* | --[lm]a*=* | --reg*=*) # These options require an argument and an argument has been provided # with the --foo=argument syntax. All is good. @@ -87,7 +88,7 @@ while test $# -ne 0; do # If it isn't, display an error and exit. case ${1?"$option option requires an argument"} in (*\'*) - optarg=" '"$(printf '%sX\n' "$1" | sed "$escape");; + optarg=" '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; (*) optarg=" '$1'";; esac @@ -99,7 +100,8 @@ while test $# -ne 0; do (*) case $option in (*\'*) - operands="$operands '"$(printf '%sX\n' "$option" | sed "$escape");; + operands="$operands '"$(printf '%sX\n' "$option" | + LC_ALL=C sed "$escape");; (*) operands="$operands '$option'";; esac @@ -136,7 +138,7 @@ while test $# -ne 0; do case $option in (*\'?*) - option=\'$(expr "X${option}X" : 'X\(.*\)' | sed "$escape");; + option=\'$(printf '%sX\n' "$option" | LC_ALL=C sed "$escape");; (*) option="'$option'";; esac @@ -153,7 +155,7 @@ eval "set -- $operands "'${1+"$@"}' if test $have_pat -eq 0; then case ${1?"Missing pattern; try \`${0##*/} --help' for help"} in (*\'*) - grep="$grep -- '"$(printf '%sX\n' "$1" | sed "$escape");; + grep="$grep -- '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; (*) grep="$grep -- '$1'";; esac