Add build-aux/license-check.sh

This helps in spotting files that lack SPDX license identifier
and which haven't been explicitly white listed either. The script
requires the .git directory to be present as only the files that
are in the Git repository are checked.

XZ Utils isn't FSFE REUSE compliant for now.
This commit is contained in:
Lasse Collin 2024-05-22 15:21:53 +03:00
parent 9ae2ebc1e5
commit f3434ecfcb
2 changed files with 175 additions and 0 deletions

View File

@ -53,6 +53,7 @@ EXTRA_DIST = \
PACKAGERS \
TODO \
autogen.sh \
build-aux/license-check.sh \
build-aux/manconv.sh \
build-aux/version.sh \
po/xz.pot-header

174
build-aux/license-check.sh Normal file
View File

@ -0,0 +1,174 @@
#!/bin/sh
# SPDX-License-Identifier: 0BSD
###############################################################################
#
# Look for missing license info in xz.git
#
# The project doesn't conform to the FSFE REUSE specification for now.
# Instead, this script helps in finding files that lack license info.
# Pass -v as an argument to get license info from all files in xz.git or,
# when .git isn't available, from files extracted from a release tarball
# (in case of a release tarball, the tree must be clean of any extra files).
#
# NOTE: This uses grep and xargs with options that aren't in POSIX.
#
###############################################################################
#
# Author: Lasse Collin
#
###############################################################################
# Print good files too if -v is passed as an argument.
VERBOSE=false
case $1 in
'')
;;
-v)
VERBOSE=true
;;
*)
echo "Usage: $0 [-v]"
exit 1
;;
esac
# Use the C locale so that sorting is always the same.
LC_ALL=C
export LC_ALL
# String to match the SPDX license identifier tag.
# Spell it here in a way that doesn't match regular grep patterns.
SPDX_LI='SPDX''-License-''Identifier'':'
# Pattern for files that don't contain SPDX tags but they are under
# a free license that isn't 0BSD.
PAT_UNTAGGED_MISC='^COPYING\.
^INSTALL\.generic$'
# Pattern for files that are 0BSD but don't contain SPDX tags.
# (The two file format specification files are public domain but
# they can be treated as 0BSD too.)
PAT_UNTAGGED_0BSD='^(.*/)?\.gitattributes$
^(.*/)?\.gitignore$
^\.github/SECURITY\.md$
^AUTHORS$
^COPYING$
^ChangeLog$
^INSTALL$
^NEWS$
^PACKAGERS$
^(.*/)?README$
^THANKS$
^TODO$
^(.*/)?[^/]+\.txt$
^po/LINGUAS$
^tests/xzgrep_expected_output$
^tests/files/[^/]+\.(lz|lzma|xz)$'
# Pattern for files that must be ignored when Git isn't available. This is
# useful when this script is run right after extracting a release tarball.
PAT_TARBALL_IGNORE='^(m4/)?[^/]*\.m4$
^(.*/)?Makefile\.in(\.in)?$
^(po|po4a)/.*[^.]..$
^ABOUT-NLS$
^build-aux/(config\..*|ltmain\.sh|[^.]*)$
^config\.h\.in$
^configure$'
# Go to the top source dir.
cd "$(dirname "$0")/.." || exit 1
# Get the list of files to check from git if possible.
# Otherwise list the whole source tree. This script should pass
# if it is run right after extracting a release tarball.
if test -d .git && type git > /dev/null 2>&1; then
FILES=$(git ls-files) || exit 1
IS_TARBALL=false
else
FILES=$(find . -type f) || exit 1
FILES=$(printf '%s\n' "$FILES" | sed 's,^\./,,')
IS_TARBALL=true
fi
# Sort to keep the order consistent.
FILES=$(printf '%s\n' "$FILES" | sort)
# Find the tagged files.
TAGGED=$(printf '%s\n' "$FILES" | xargs -r -d '\n' grep -l "$SPDX_LI")
# Find the tagged 0BSD files.
TAGGED_0BSD=$(printf '%s\n' "$TAGGED" \
| xargs -r -d '\n' grep -l "$SPDX_LI 0BSD")
# Find the tagged non-0BSD files, that is, remove the 0BSD-tagged files
# from the list of tagged files.
TAGGED_MISC=$(printf '%s\n%s\n' "$TAGGED" "$TAGGED_0BSD" | sort | uniq -u)
# Remove the tagged files from the list.
FILES=$(printf '%s\n%s\n' "$FILES" "$TAGGED" | sort | uniq -u)
# Find the intentionally-untagged files.
UNTAGGED_0BSD=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_0BSD")
UNTAGGED_MISC=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_MISC")
# Remove the intentionally-untagged files from the list.
FILES=$(printf '%s\n' "$FILES" | grep -Ev \
-e "$PAT_UNTAGGED_0BSD" -e "$PAT_UNTAGGED_MISC")
# FIXME: Allow untagged translations if they have a public domain notice.
# These are old translations that haven't been updated after 2024-02-14.
# Eventually these should go away.
PD_PO=$(printf '%s\n' "$FILES" | grep '\.po$' | \
xargs -r -d '\n' grep -Fl '# This file is put in the public domain.')
if test -n "$PD_PO"; then
# Remove the public domain .po files from the list.
FILES=$(printf '%s\n%s\n' "$FILES" "$PD_PO" | sort | uniq -u)
fi
# Remove generated files from the list which don't have SPDX tags but which
# can be present in release tarballs. This step is skipped when the file list
# is from "git ls-files".
GENERATED=
if $IS_TARBALL; then
GENERATED=$(printf '%s\n' "$FILES" | grep -E "$PAT_TARBALL_IGNORE")
FILES=$(printf '%s\n' "$FILES" | grep -Ev "$PAT_TARBALL_IGNORE")
fi
if $VERBOSE; then
printf '# Tagged 0BSD files:\n%s\n\n' "$TAGGED_0BSD"
printf '# Intentionally untagged 0BSD:\n%s\n\n' "$UNTAGGED_0BSD"
# FIXME: Remove when no longer needed.
if test -n "$PD_PO"; then
printf '# Old public domain translations:\n%s\n\n' "$PD_PO"
fi
printf '# Tagged non-0BSD files:\n%s\n\n' "$TAGGED_MISC"
printf '# Intentionally untagged miscellaneous: \n%s\n\n' \
"$UNTAGGED_MISC"
if test -n "$GENERATED"; then
printf '# Generated files whose license was NOT checked:\n%s\n\n' \
"$GENERATED"
fi
fi
# Look for files with an unknown license and set the exit status accordingly.
STATUS=0
if test -n "$UNTAGGED"; then
printf '# ERROR: Licensing is unclear:\n%s\n' "$UNTAGGED"
STATUS=1
fi
exit "$STATUS"