1#!/bin/sh
2#
3# Copyright (C) 2019 Free Software Foundation, Inc.
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program.  If not, see <https://www.gnu.org/licenses/>.
17#
18
19# This script checks whether the files in the GNU gettext package
20# have the required copyright headers resp. license notices.
21#
22# Copyright notices have two purposes:
23#
24#  1. They tell the people who obtain the package (as a git checkout,
25#     source tarball, or binary package) what they are allowed to do
26#     with it.
27#
28#  2. They provide the legal basis for suing people who infringe on
29#     the copyright of the package (or part of it).  In particular,
30#     they should prove that the infringer knew what they were doing
31#     when they took a source file (or part of it) and used it in
32#     a way that is not compliant with the license.
33#
34# For the purpose 1, a file COPYING at the top-level of the package
35# and a reference to it in the main README are all that's needed.
36#
37# For the purpose 2, we put copyright notices in each file that we
38# don't want to be abused of. See also the section "Copyright Notices"
39# in the 'Information for maintainers of GNU software',
40# <https://www.gnu.org/prep/maintain/html_node/Copyright-Notices.html>.
41#
42# This script helps with purpose 2.  The input of this script is a
43# source tarball, not a git checkout.  Rationale:
44# - It is convenient to add files to a git repository, without having
45#   to attach a copyright notice to it.
46# - Files that we don't include in the source tarball are not so valuable
47#   that we would like to sue infringers over them.
48#
49# We classify the files in several categories:
50# - Source code that ends up being executed by end users (in source or
51#   compiled form).  This is the most valuable one and needs copyright
52#   notices.
53# - Source of large documentation.  This is valuable too, and needs
54#   copyright notices.
55# - Source code of build infrastructure.
56# - Files that contain convenience information for programmers and
57#   packagers.
58# The latter categories are not valuable enough to warrant sueing
59# infringers.
60#
61# Not all files qualify for a copyright header.  Namely, since copyright
62# applies to the expression of an idea (think roughly of the program as
63# an art work), it makes no sense to attach a copyright header to a file
64# which different programmers would write in the same way.  This includes
65# tiny files, as well as files that contain only machine-generated data.
66
67# Usage: check-copyright-headers UNPACKED-GETTEXT-TARBALL
68
69progname="$0"
70
71if test $# != 1; then
72  echo "Usage: check-copyright-headers UNPACKED-GETTEXT-TARBALL" 1>&2
73  exit 1
74fi
75
76dir="$1"
77test -d "$dir" || {
78  echo "*** $progname: '$dir' is not a directory." 1>&2
79  exit 1
80}
81if test -d "$dir/.git"; then
82  echo "*** $progname: '$dir' is a git checkout, not an unpacked tarball." 1>&2
83  exit 1
84fi
85
86# func_check_file FILE
87# checks FILE, a relative file name.
88# Return code: 0 if OK, 1 if missing a copyright header.
89func_check_file ()
90{
91  case "/$1" in
92
93    */tests/* | *-tests/* )
94      # A test file.
95      # We are not interested in suing infringers of test code.
96      # In fact, anyone can apply our test suites to even prorietary software.
97      # And this is even welcome (regarding the competing implementations of
98      # the libintl functions), because it helps in portability of software
99      # that uses the libintl API.
100      return 0 ;;
101
102    */modules/* )
103      # Gnulib modules are not valuable enough to warrant suing infringers.
104      # Also, they often don't have much programmer expression.
105      return 0 ;;
106
107    /gettext-tools/doc/ISO_3166 | /gettext-tools/doc/ISO_3166_de | \
108    /gettext-tools/doc/ISO_639 | /gettext-tools/doc/ISO_639-2 | \
109    /gettext-tools/examples/hello-*/m4/Makefile.am | \
110    /gettext-tools/examples/hello-objc-gnustep/po/LocaleAliases )
111      # These are a mostly data.  They don't have much programmer expression.
112      return 0 ;;
113
114    */ChangeLog* )
115      # ChangeLog files are convenience information, not worth sueing for.
116      return 0 ;;
117
118    */AUTHORS | */COPYING* | */DEPENDENCIES | */FILES | */HACKING | \
119    */INSTALL* | */NEWS | */PACKAGING | */README* | */THANKS )
120      # These files contain convenience information, not worth sueing for.
121      return 0 ;;
122
123    /os2/* )
124      # Old stuff, not worth sueing for.
125      return 0 ;;
126
127    */libcroco/* | */libxml/* )
128      # We repackage libcroco and libxml as sources and are not interested
129      # in attaching our own copyright header to each.
130      return 0 ;;
131
132    /gettext-tools/examples/hello-*-gnome/m4/*.m4 | \
133    /gettext-tools/projects/GNOME/teams.html )
134      # These files come from the GNOME project.
135      # We are not interested in attaching our own copyright header to each.
136      return 0 ;;
137
138    /gettext-tools/examples/hello-*-kde/admin/* | \
139    /gettext-tools/projects/KDE/teams.html )
140      # These files come from the KDE project.
141      # We are not interested in attaching our own copyright header to each.
142      return 0 ;;
143
144    /gettext-tools/examples/hello-*-wxwidgets/m4/wxwidgets.m4 )
145      # These files come from the wxwidgets project.
146      # We are not interested in attaching our own copyright header to each.
147      return 0 ;;
148
149    /gettext-tools/projects/TP/teams.html )
150      # These files come from the translationproject.org project.
151      # We are not interested in attaching our own copyright header to each.
152      return 0 ;;
153
154    /gettext-tools/misc/DISCLAIM )
155      # This is a form, used for communication with the FSF.
156      return 0 ;;
157
158    /gettext-tools/misc/archive.dir.tar )
159      # This is an archive of files that were part of earlier gettext releases.
160      # As a binary file, it cannot have a copyright header.
161      return 0 ;;
162
163    *.gmo )
164      # These are binary files. FOO.gmo is generated from FOO.po, which is
165      # also distributed.
166      return 0 ;;
167
168    *.class )
169      # These are binary files. FOO.class is generated from FOO.java, which is
170      # also distributed.
171      return 0 ;;
172
173    /gettext-runtime/intl-csharp/doc/* | \
174    /gettext-runtime/intl-java/javadoc2/* | \
175    /gettext-runtime/doc/matrix.texi | \
176    /gettext-tools/doc/iso-639.texi | /gettext-tools/doc/iso-639-2.texi | \
177    /gettext-tools/doc/iso-3166.texi | \
178    */man/*.[13].html | \
179    */*_[0-9].html | */*_[0-9][0-9].html | \
180    */*_all.html | */*_toc.html | */*_fot.html | */*_abt.html | \
181    *.priv.h | *.vt.h | \
182    /libtextstyle/lib/libtextstyle.sym.in )
183      # These are generated files.  We ship their sources as well.
184      return 0 ;;
185
186    *.diff )
187      # These are patches to existing files.  It is understood that the patch
188      # preserves the copyright status of the file, that is, that the .diff
189      # file delegates its copyright status to the file.
190      return 0 ;;
191
192    */quot.sed | */boldquot.sed | */en@quot.header | */en@boldquot.header )
193      if test -f "$dir"/`dirname "$1"`/Rules-quot; then
194        # These files are covered by the notice in the Rules-quot file in the
195        # same directory.
196        return 0
197      fi
198      ;;
199
200    /gettext-tools/libgrep/kwset.c )
201      # The file has a copyright header, with the Copyright line spread
202      # across two lines.
203      return 0 ;;
204
205    */texinfo.tex )
206      # The file has a copyright header, with the Copyright line spread
207      # across three lines.
208      return 0 ;;
209
210    *.texi )
211      if head -n 100 "$dir/$1" | LC_ALL=C grep 'Copyright ([Cc]) ' >/dev/null; then
212        # The file has a copyright notice, although not exactly at the beginning.
213        return 0
214      fi
215      ;;
216
217    *.rc )
218      if LC_ALL=C grep 'This program is free software[:;] you can redistribute it' "$dir/$1" >/dev/null \
219         || LC_ALL=C grep 'This library is free software[:;] you can redistribute it' "$dir/$1" >/dev/null; then
220        # The file carries a copyright notice in it.
221        return 0
222      fi
223      ;;
224
225  esac
226
227  if head -n 15 "$dir/$1" | LC_ALL=C grep 'Copyright ([Cc]) ' >/dev/null; then
228    # The file has a copyright header.
229    return 0
230  fi
231
232  # <https://www.gnu.org/prep/maintain/html_node/Copyright-Notices.html>
233  # says that the (C) "can be omitted entirely; the word ‘Copyright’ suffices.
234  if head -n 15 "$dir/$1" | LC_ALL=C grep 'Copyright .* Free Software Foundation' >/dev/null; then
235    # The file has a copyright header.
236    return 0
237  fi
238
239  # <https://www.gnu.org/prep/maintain/html_node/Copyright-Notices.html>
240  # says " If a file has been explicitly placed in the public domain, then
241  # instead of a copyright notice, it should have a notice saying explicitly
242  # that it is in the public domain."
243  if head -n 15 "$dir/$1" | LC_ALL=C grep 'This .* is in the public domain\.' >/dev/null \
244     || head -n 15 "$dir/$1" | LC_ALL=C fgrep 'In the public domain.' >/dev/null \
245     || head -n 15 "$dir/$1" | LC_ALL=C fgrep 'Public domain.' >/dev/null \
246     || head -n 15 "$dir/$1" | LC_ALL=C fgrep 'public-domain implementation' >/dev/null; then
247    # The file has a public domain notice.
248    return 0
249  fi
250
251  if head -n 15 "$dir/$1" | LC_ALL=C fgrep 'This file is distributed under the same license as ' >/dev/null; then
252    # The file has a notice that delegates to another copyright notice.
253    return 0
254  fi
255
256  if head -n 1 "$dir/$1" | LC_ALL=C fgrep 'Generated from configure.ac by autoheader.' >/dev/null \
257     || head -n 1 "$dir/$1" | LC_ALL=C fgrep 'code produced by gperf version' >/dev/null \
258     || head -n 1 "$dir/$1" | LC_ALL=C fgrep 'Creator     : groff version' >/dev/null \
259     || head -n 1 "$dir/$1" | LC_ALL=C fgrep 'DO NOT MODIFY THIS FILE!  It was generated by help2man' >/dev/null \
260     || head -n 3 "$dir/$1" | LC_ALL=C fgrep 'Generated automatically by gen-uni-tables.c for Unicode' >/dev/null \
261     || head -n 6 "$dir/$1" | LC_ALL=C fgrep 'Generated automatically by the gen-uninames utility.' >/dev/null; then
262    # These are generated files.  We ship their sources as well.
263    return 0
264  fi
265
266  if test `LC_ALL=C wc -l < "$dir/$1"` -le 8; then
267    # The file has very few lines.
268    # It thus doesn't have much programmer expression.
269    return 0
270  fi
271
272  return 1
273}
274
275fail=false
276for file in `cd "$dir" && find . -type f -print | sed -e 's,^[.]/,,' | LC_ALL=C sort`; do
277  func_check_file "$file" || {
278    echo "*** Missing copyright header in file $file" 1>&2
279    fail=true
280  }
281done
282
283if $fail; then
284  exit 1
285else
286  exit 0
287fi
288