diff --git a/Makefile.am b/Makefile.am index 5ba0dab2..43c9a336 100644 --- a/Makefile.am +++ b/Makefile.am @@ -53,6 +53,7 @@ EXTRA_DIST = \ PACKAGERS \ TODO \ autogen.sh \ + build-aux/license-check.sh \ build-aux/manconv.sh \ build-aux/version.sh \ po/xz.pot-header diff --git a/build-aux/license-check.sh b/build-aux/license-check.sh new file mode 100644 index 00000000..f352e1c8 --- /dev/null +++ b/build-aux/license-check.sh @@ -0,0 +1,174 @@ +#!/bin/sh +# SPDX-License-Identifier: 0BSD + +############################################################################### +# +# Look for missing license info in xz.git +# +# The project doesn't conform to the FSFE REUSE specification for now. +# Instead, this script helps in finding files that lack license info. +# Pass -v as an argument to get license info from all files in xz.git or, +# when .git isn't available, from files extracted from a release tarball +# (in case of a release tarball, the tree must be clean of any extra files). +# +# NOTE: This uses grep and xargs with options that aren't in POSIX. +# +############################################################################### +# +# Author: Lasse Collin +# +############################################################################### + +# Print good files too if -v is passed as an argument. +VERBOSE=false +case $1 in + '') + ;; + -v) + VERBOSE=true + ;; + *) + echo "Usage: $0 [-v]" + exit 1 + ;; +esac + + +# Use the C locale so that sorting is always the same. +LC_ALL=C +export LC_ALL + + +# String to match the SPDX license identifier tag. +# Spell it here in a way that doesn't match regular grep patterns. +SPDX_LI='SPDX''-License-''Identifier'':' + +# Pattern for files that don't contain SPDX tags but they are under +# a free license that isn't 0BSD. +PAT_UNTAGGED_MISC='^COPYING\. +^INSTALL\.generic$' + +# Pattern for files that are 0BSD but don't contain SPDX tags. +# (The two file format specification files are public domain but +# they can be treated as 0BSD too.) +PAT_UNTAGGED_0BSD='^(.*/)?\.gitattributes$ +^(.*/)?\.gitignore$ +^\.github/SECURITY\.md$ +^AUTHORS$ +^COPYING$ +^ChangeLog$ +^INSTALL$ +^NEWS$ +^PACKAGERS$ +^(.*/)?README$ +^THANKS$ +^TODO$ +^(.*/)?[^/]+\.txt$ +^po/LINGUAS$ +^tests/xzgrep_expected_output$ +^tests/files/[^/]+\.(lz|lzma|xz)$' + +# Pattern for files that must be ignored when Git isn't available. This is +# useful when this script is run right after extracting a release tarball. +PAT_TARBALL_IGNORE='^(m4/)?[^/]*\.m4$ +^(.*/)?Makefile\.in(\.in)?$ +^(po|po4a)/.*[^.]..$ +^ABOUT-NLS$ +^build-aux/(config\..*|ltmain\.sh|[^.]*)$ +^config\.h\.in$ +^configure$' + + +# Go to the top source dir. +cd "$(dirname "$0")/.." || exit 1 + +# Get the list of files to check from git if possible. +# Otherwise list the whole source tree. This script should pass +# if it is run right after extracting a release tarball. +if test -d .git && type git > /dev/null 2>&1; then + FILES=$(git ls-files) || exit 1 + IS_TARBALL=false +else + FILES=$(find . -type f) || exit 1 + FILES=$(printf '%s\n' "$FILES" | sed 's,^\./,,') + IS_TARBALL=true +fi + +# Sort to keep the order consistent. +FILES=$(printf '%s\n' "$FILES" | sort) + + +# Find the tagged files. +TAGGED=$(printf '%s\n' "$FILES" | xargs -r -d '\n' grep -l "$SPDX_LI") + +# Find the tagged 0BSD files. +TAGGED_0BSD=$(printf '%s\n' "$TAGGED" \ + | xargs -r -d '\n' grep -l "$SPDX_LI 0BSD") + +# Find the tagged non-0BSD files, that is, remove the 0BSD-tagged files +# from the list of tagged files. +TAGGED_MISC=$(printf '%s\n%s\n' "$TAGGED" "$TAGGED_0BSD" | sort | uniq -u) + + +# Remove the tagged files from the list. +FILES=$(printf '%s\n%s\n' "$FILES" "$TAGGED" | sort | uniq -u) + +# Find the intentionally-untagged files. +UNTAGGED_0BSD=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_0BSD") +UNTAGGED_MISC=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_MISC") + +# Remove the intentionally-untagged files from the list. +FILES=$(printf '%s\n' "$FILES" | grep -Ev \ + -e "$PAT_UNTAGGED_0BSD" -e "$PAT_UNTAGGED_MISC") + + +# FIXME: Allow untagged translations if they have a public domain notice. +# These are old translations that haven't been updated after 2024-02-14. +# Eventually these should go away. +PD_PO=$(printf '%s\n' "$FILES" | grep '\.po$' | \ + xargs -r -d '\n' grep -Fl '# This file is put in the public domain.') + +if test -n "$PD_PO"; then + # Remove the public domain .po files from the list. + FILES=$(printf '%s\n%s\n' "$FILES" "$PD_PO" | sort | uniq -u) +fi + + +# Remove generated files from the list which don't have SPDX tags but which +# can be present in release tarballs. This step is skipped when the file list +# is from "git ls-files". +GENERATED= +if $IS_TARBALL; then + GENERATED=$(printf '%s\n' "$FILES" | grep -E "$PAT_TARBALL_IGNORE") + FILES=$(printf '%s\n' "$FILES" | grep -Ev "$PAT_TARBALL_IGNORE") +fi + + +if $VERBOSE; then + printf '# Tagged 0BSD files:\n%s\n\n' "$TAGGED_0BSD" + printf '# Intentionally untagged 0BSD:\n%s\n\n' "$UNTAGGED_0BSD" + + # FIXME: Remove when no longer needed. + if test -n "$PD_PO"; then + printf '# Old public domain translations:\n%s\n\n' "$PD_PO" + fi + + printf '# Tagged non-0BSD files:\n%s\n\n' "$TAGGED_MISC" + printf '# Intentionally untagged miscellaneous: \n%s\n\n' \ + "$UNTAGGED_MISC" + + if test -n "$GENERATED"; then + printf '# Generated files whose license was NOT checked:\n%s\n\n' \ + "$GENERATED" + fi +fi + + +# Look for files with an unknown license and set the exit status accordingly. +STATUS=0 +if test -n "$UNTAGGED"; then + printf '# ERROR: Licensing is unclear:\n%s\n' "$UNTAGGED" + STATUS=1 +fi + +exit "$STATUS"