mirror of https://git.tukaani.org/xz.git
178 lines
5.0 KiB
Bash
178 lines
5.0 KiB
Bash
#!/bin/sh
|
|
# SPDX-License-Identifier: 0BSD
|
|
|
|
###############################################################################
|
|
#
|
|
# Look for missing license info in xz.git
|
|
#
|
|
# The project doesn't conform to the FSFE REUSE specification for now.
|
|
# Instead, this script helps in finding files that lack license info.
|
|
# Pass -v as an argument to get license info from all files in xz.git or,
|
|
# when .git isn't available, from files extracted from a release tarball
|
|
# (in case of a release tarball, the tree must be clean of any extra files).
|
|
#
|
|
# NOTE: This relies on non-POSIX xargs -0. It's supported on GNU and *BSDs.
|
|
#
|
|
###############################################################################
|
|
#
|
|
# Author: Lasse Collin
|
|
#
|
|
###############################################################################
|
|
|
|
# Print good files too if -v is passed as an argument.
|
|
VERBOSE=false
|
|
case $1 in
|
|
'')
|
|
;;
|
|
-v)
|
|
VERBOSE=true
|
|
;;
|
|
*)
|
|
echo "Usage: $0 [-v]"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
|
|
# Use the C locale so that sorting is always the same.
|
|
LC_ALL=C
|
|
export LC_ALL
|
|
|
|
|
|
# String to match the SPDX license identifier tag.
|
|
# Spell it here in a way that doesn't match regular grep patterns.
|
|
SPDX_LI='SPDX''-License-''Identifier'':'
|
|
|
|
# Pattern for files that don't contain SPDX tags but they are under
|
|
# a free license that isn't 0BSD.
|
|
PAT_UNTAGGED_MISC='^COPYING\.
|
|
^INSTALL\.generic$'
|
|
|
|
# Pattern for files that are 0BSD but don't contain SPDX tags.
|
|
# (The two file format specification files are public domain but
|
|
# they can be treated as 0BSD too.)
|
|
PAT_UNTAGGED_0BSD='^(.*/)?\.gitattributes$
|
|
^(.*/)?\.gitignore$
|
|
^\.github/SECURITY\.md$
|
|
^AUTHORS$
|
|
^COPYING$
|
|
^ChangeLog$
|
|
^INSTALL$
|
|
^NEWS$
|
|
^PACKAGERS$
|
|
^(.*/)?README$
|
|
^THANKS$
|
|
^TODO$
|
|
^(.*/)?[^/]+\.txt$
|
|
^doc/SHA256SUMS$
|
|
^po/LINGUAS$
|
|
^src/common/w32_application\.manifest$
|
|
^tests/xzgrep_expected_output$
|
|
^tests/files/[^/]+\.(lz|lzma|xz)$'
|
|
|
|
# Pattern for files that must be ignored when Git isn't available. This is
|
|
# useful when this script is run right after extracting a release tarball.
|
|
PAT_TARBALL_IGNORE='^(m4/)?[^/]*\.m4$
|
|
^(.*/)?Makefile\.in(\.in)?$
|
|
^(po|po4a)/.*[^.]..$
|
|
^ABOUT-NLS$
|
|
^build-aux/(config\..*|ltmain\.sh|[^.]*)$
|
|
^config\.h\.in$
|
|
^configure$'
|
|
|
|
|
|
# Go to the top source dir.
|
|
cd "$(dirname "$0")/.." || exit 1
|
|
|
|
# Get the list of files to check from git if possible.
|
|
# Otherwise list the whole source tree. This script should pass
|
|
# if it is run right after extracting a release tarball.
|
|
if test -d .git && type git > /dev/null 2>&1; then
|
|
FILES=$(git ls-files) || exit 1
|
|
IS_TARBALL=false
|
|
else
|
|
FILES=$(find . -type f) || exit 1
|
|
FILES=$(printf '%s\n' "$FILES" | sed 's,^\./,,')
|
|
IS_TARBALL=true
|
|
fi
|
|
|
|
# Sort to keep the order consistent.
|
|
FILES=$(printf '%s\n' "$FILES" | sort)
|
|
|
|
|
|
# Find the tagged files.
|
|
TAGGED=$(printf '%s\n' "$FILES" \
|
|
| tr '\n' '\000' | xargs -0r grep -l "$SPDX_LI" --)
|
|
|
|
# Find the tagged 0BSD files.
|
|
TAGGED_0BSD=$(printf '%s\n' "$TAGGED" \
|
|
| tr '\n' '\000' | xargs -0r grep -l "$SPDX_LI 0BSD" --)
|
|
|
|
# Find the tagged non-0BSD files, that is, remove the 0BSD-tagged files
|
|
# from the list of tagged files.
|
|
TAGGED_MISC=$(printf '%s\n%s\n' "$TAGGED" "$TAGGED_0BSD" | sort | uniq -u)
|
|
|
|
|
|
# Remove the tagged files from the list.
|
|
FILES=$(printf '%s\n%s\n' "$FILES" "$TAGGED" | sort | uniq -u)
|
|
|
|
# Find the intentionally-untagged files.
|
|
UNTAGGED_0BSD=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_0BSD")
|
|
UNTAGGED_MISC=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_MISC")
|
|
|
|
# Remove the intentionally-untagged files from the list.
|
|
FILES=$(printf '%s\n' "$FILES" | grep -Ev \
|
|
-e "$PAT_UNTAGGED_0BSD" -e "$PAT_UNTAGGED_MISC")
|
|
|
|
|
|
# FIXME: Allow untagged translations if they have a public domain notice.
|
|
# These are old translations that haven't been updated after 2024-02-14.
|
|
# Eventually these should go away.
|
|
PD_PO=$(printf '%s\n' "$FILES" | grep '\.po$' | tr '\n' '\000' \
|
|
| xargs -0r grep -Fl '# This file is put in the public domain.' --)
|
|
|
|
if test -n "$PD_PO"; then
|
|
# Remove the public domain .po files from the list.
|
|
FILES=$(printf '%s\n%s\n' "$FILES" "$PD_PO" | sort | uniq -u)
|
|
fi
|
|
|
|
|
|
# Remove generated files from the list which don't have SPDX tags but which
|
|
# can be present in release tarballs. This step is skipped when the file list
|
|
# is from "git ls-files".
|
|
GENERATED=
|
|
if $IS_TARBALL; then
|
|
GENERATED=$(printf '%s\n' "$FILES" | grep -E "$PAT_TARBALL_IGNORE")
|
|
FILES=$(printf '%s\n' "$FILES" | grep -Ev "$PAT_TARBALL_IGNORE")
|
|
fi
|
|
|
|
|
|
if $VERBOSE; then
|
|
printf '# Tagged 0BSD files:\n%s\n\n' "$TAGGED_0BSD"
|
|
printf '# Intentionally untagged 0BSD:\n%s\n\n' "$UNTAGGED_0BSD"
|
|
|
|
# FIXME: Remove when no longer needed.
|
|
if test -n "$PD_PO"; then
|
|
printf '# Old public domain translations:\n%s\n\n' "$PD_PO"
|
|
fi
|
|
|
|
printf '# Tagged non-0BSD files:\n%s\n\n' "$TAGGED_MISC"
|
|
printf '# Intentionally untagged miscellaneous: \n%s\n\n' \
|
|
"$UNTAGGED_MISC"
|
|
|
|
if test -n "$GENERATED"; then
|
|
printf '# Generated files whose license was NOT checked:\n%s\n\n' \
|
|
"$GENERATED"
|
|
fi
|
|
fi
|
|
|
|
|
|
# Look for files with an unknown license and set the exit status accordingly.
|
|
STATUS=0
|
|
if test -n "$FILES"; then
|
|
printf '# ERROR: Licensing is unclear:\n%s\n' "$FILES"
|
|
STATUS=1
|
|
fi
|
|
|
|
exit "$STATUS"
|