1#!/bin/sh
2#! -*-perl-*-
3
4# Update an FSF copyright year list to include the current year.
5
6# Copyright (C) 2009-2020 Free Software Foundation, Inc.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 3, or (at your option)
11# any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program.  If not, see <https://www.gnu.org/licenses/>.
20#
21# Written by Jim Meyering and Joel E. Denny
22
23# This script updates an FSF copyright year list to include the current year.
24# Usage: update-copyright [FILE...]
25#
26# The arguments to this script should be names of files that contain
27# copyright statements to be updated.  The copyright holder's name
28# defaults to "Free Software Foundation, Inc." but may be changed to
29# any other name by using the "UPDATE_COPYRIGHT_HOLDER" environment
30# variable.
31#
32# For example, you might wish to use the update-copyright target rule
33# in maint.mk from gnulib's maintainer-makefile module.
34#
35# Iff a copyright statement is recognized in a file and the final
36# year is not the current year, then the statement is updated for the
37# new year and it is reformatted to:
38#
39#   1. Fit within 72 columns.
40#   2. Convert 2-digit years to 4-digit years by prepending "19".
41#   3. Expand copyright year intervals.  (See "Environment variables"
42#      below.)
43#
44# A warning is printed for every file for which no copyright
45# statement is recognized.
46#
47# Each file's copyright statement must be formatted correctly in
48# order to be recognized.  For example, each of these is fine:
49#
50#   Copyright @copyright{} 1990-2005, 2007-2009 Free Software
51#   Foundation, Inc.
52#
53#   # Copyright (C) 1990-2005, 2007-2009 Free Software
54#   # Foundation, Inc.
55#
56#   /*
57#    * Copyright &copy; 90,2005,2007-2009
58#    * Free Software Foundation, Inc.
59#    */
60#
61# However, the following format is not recognized because the line
62# prefix changes after the first line:
63#
64#   ## Copyright (C) 1990-2005, 2007-2009 Free Software
65#   #  Foundation, Inc.
66#
67# However, any correctly formatted copyright statement following
68# a non-matching copyright statements would be recognized.
69#
70# The exact conditions that a file's copyright statement must meet
71# to be recognized are:
72#
73#   1. It is the first copyright statement that meets all of the
74#      following conditions.  Subsequent copyright statements are
75#      ignored.
76#   2. Its format is "Copyright (C)", then a list of copyright years,
77#      and then the name of the copyright holder.
78#   3. The "(C)" takes one of the following forms or is omitted
79#      entirely:
80#
81#        A. (C)
82#        B. (c)
83#        C. @copyright{}
84#        D. &copy;
85#        E. ©
86#
87#   4. The "Copyright" appears at the beginning of a line, except that it
88#      may be prefixed by any sequence (e.g., a comment) of no more than
89#      5 characters -- including white space.
90#   5. Iff such a prefix is present, the same prefix appears at the
91#      beginning of each remaining line within the FSF copyright
92#      statement.  There is one exception in order to support C-style
93#      comments: if the first line's prefix contains nothing but
94#      whitespace surrounding a "/*", then the prefix for all subsequent
95#      lines is the same as the first line's prefix except with each of
96#      "/" and possibly "*" replaced by a " ".  The replacement of "*"
97#      by " " is consistent throughout all subsequent lines.
98#   6. Blank lines, even if preceded by the prefix, do not appear
99#      within the FSF copyright statement.
100#   7. Each copyright year is 2 or 4 digits, and years are separated by
101#      commas, "-", or "--".  Whitespace may appear after commas.
102#
103# Environment variables:
104#
105#   1. If UPDATE_COPYRIGHT_FORCE=1, a recognized FSF copyright statement
106#      is reformatted even if it does not need updating for the new
107#      year.  If unset or set to 0, only updated FSF copyright
108#      statements are reformatted.
109#   2. If UPDATE_COPYRIGHT_USE_INTERVALS=1, every series of consecutive
110#      copyright years (such as 90, 1991, 1992-2007, 2008) in a
111#      reformatted FSF copyright statement is collapsed to a single
112#      interval (such as 1990-2008).  If unset or set to 0, all existing
113#      copyright year intervals in a reformatted FSF copyright statement
114#      are expanded instead.
115#      If UPDATE_COPYRIGHT_USE_INTERVALS=2, convert a sequence with gaps
116#      to the minimal containing range.  For example, convert
117#      2000, 2004-2007, 2009 to 2000-2009.
118#   3. For testing purposes, you can set the assumed current year in
119#      UPDATE_COPYRIGHT_YEAR.
120#   4. The default maximum line length for a copyright line is 72.
121#      Set UPDATE_COPYRIGHT_MAX_LINE_LENGTH to use a different length.
122#   5. Set UPDATE_COPYRIGHT_HOLDER if the copyright holder is other
123#      than "Free Software Foundation, Inc.".
124
125# This is a prologue that allows to run a perl script as an executable
126# on systems that are compliant to a POSIX version before POSIX:2017.
127# On such systems, the usual invocation of an executable through execlp()
128# or execvp() fails with ENOEXEC if it is a script that does not start
129# with a #! line.  The script interpreter mentioned in the #! line has
130# to be /bin/sh, because on GuixSD systems that is the only program that
131# has a fixed file name.  The second line is essential for perl and is
132# also useful for editing this file in Emacs.  The next two lines below
133# are valid code in both sh and perl.  When executed by sh, they re-execute
134# the script through the perl program found in $PATH.  The '-x' option
135# is essential as well; without it, perl would re-execute the script
136# through /bin/sh.  When executed by perl, the next two lines are a no-op.
137eval 'exec perl -wSx -0777 -pi "$0" "$@"'
138     if 0;
139
140my $VERSION = '2020-04-04.15:07'; # UTC
141# The definition above must lie within the first 8 lines in order
142# for the Emacs time-stamp write hook (at end) to update it.
143# If you change this file with Emacs, please let the write hook
144# do its job.  Otherwise, update this string manually.
145
146use strict;
147use warnings;
148
149my $copyright_re = 'Copyright';
150my $circle_c_re = '(?:\([cC]\)|@copyright\{}|\\\\\(co|&copy;|©)';
151my $holder = $ENV{UPDATE_COPYRIGHT_HOLDER};
152$holder ||= 'Free Software Foundation, Inc.';
153my $prefix_max = 5;
154my $margin = $ENV{UPDATE_COPYRIGHT_MAX_LINE_LENGTH};
155!$margin || $margin !~ m/^\d+$/
156  and $margin = 72;
157
158my $tab_width = 8;
159
160my $this_year = $ENV{UPDATE_COPYRIGHT_YEAR};
161if (!$this_year || $this_year !~ m/^\d{4}$/)
162  {
163    my ($sec, $min, $hour, $mday, $month, $year) = localtime (time ());
164    $this_year = $year + 1900;
165  }
166
167# Unless the file consistently uses "\r\n" as the EOL, use "\n" instead.
168my $eol = /(?:^|[^\r])\n/ ? "\n" : "\r\n";
169
170my $leading;
171my $prefix;
172my $ws_re;
173my $stmt_re;
174while (/(^|\n)(.{0,$prefix_max})$copyright_re/g)
175  {
176    $leading = "$1$2";
177    $prefix = $2;
178    if ($prefix =~ /^(\s*\/)\*(\s*)$/)
179      {
180        $prefix =~ s,/, ,;
181        my $prefix_ws = $prefix;
182        $prefix_ws =~ s/\*/ /; # Only whitespace.
183        if (/\G(?:[^*\n]|\*[^\/\n])*\*?\n$prefix_ws/)
184          {
185            $prefix = $prefix_ws;
186          }
187      }
188    $ws_re = '[ \t\r\f]'; # \s without \n
189    $ws_re =
190      "(?:$ws_re*(?:$ws_re|\\n" . quotemeta($prefix) . ")$ws_re*)";
191    my $holder_re = $holder;
192    $holder_re =~ s/\s/$ws_re/g;
193    my $stmt_remainder_re =
194      "(?:$ws_re$circle_c_re)?"
195      . "$ws_re(?:(?:\\d\\d)?\\d\\d(?:,$ws_re?|--?))*"
196      . "((?:\\d\\d)?\\d\\d)$ws_re$holder_re";
197    if (/\G$stmt_remainder_re/)
198      {
199        $stmt_re =
200          quotemeta($leading) . "($copyright_re$stmt_remainder_re)";
201        last;
202      }
203  }
204if (defined $stmt_re)
205  {
206    /$stmt_re/ or die; # Should never die.
207    my $stmt = $1;
208    my $final_year_orig = $2;
209
210    # Handle two-digit year numbers like "98" and "99".
211    my $final_year = $final_year_orig;
212    $final_year <= 99
213      and $final_year += 1900;
214
215    if ($final_year != $this_year)
216      {
217        # Update the year.
218        $stmt =~ s/\b$final_year_orig\b/$final_year, $this_year/;
219      }
220    if ($final_year != $this_year || $ENV{'UPDATE_COPYRIGHT_FORCE'})
221      {
222        # Normalize all whitespace including newline-prefix sequences.
223        $stmt =~ s/$ws_re/ /g;
224
225        # Put spaces after commas.
226        $stmt =~ s/, ?/, /g;
227
228        # Convert 2-digit to 4-digit years.
229        $stmt =~ s/(\b\d\d\b)/19$1/g;
230
231        # Make the use of intervals consistent.
232        if (!$ENV{UPDATE_COPYRIGHT_USE_INTERVALS})
233          {
234            $stmt =~ s/(\d{4})--?(\d{4})/join(', ', $1..$2)/eg;
235          }
236        else
237          {
238            my $ndash = $ARGV =~ /\.tex(i(nfo)?)?$/ ? "--" : "-";
239
240            $stmt =~
241              s/
242                (\d{4})
243                (?:
244                  (,\ |--?)
245                  ((??{
246                    if   ($2 ne ', ') { '\d{4}'; }
247                    elsif (!$3)       { $1 + 1;  }
248                    else              { $3 + 1;  }
249                  }))
250                )+
251              /$1$ndash$3/gx;
252
253            # When it's 2, emit a single range encompassing all year numbers.
254            $ENV{UPDATE_COPYRIGHT_USE_INTERVALS} == 2
255              and $stmt =~ s/\b(\d{4})\b.*\b(\d{4})\b/$1$ndash$2/;
256          }
257
258        # Format within margin.
259        my $stmt_wrapped;
260        my $text_margin = $margin - length($prefix);
261        if ($prefix =~ /^(\t+)/)
262          {
263            $text_margin -= length($1) * ($tab_width - 1);
264          }
265        while (length $stmt)
266          {
267            if (($stmt =~ s/^(.{1,$text_margin})(?: |$)//)
268                || ($stmt =~ s/^([\S]+)(?: |$)//))
269              {
270                my $line = $1;
271                $stmt_wrapped .= $stmt_wrapped ? "$eol$prefix" : $leading;
272                $stmt_wrapped .= $line;
273              }
274            else
275              {
276                # Should be unreachable, but we don't want an infinite
277                # loop if it can be reached.
278                die;
279              }
280          }
281
282        # Replace the old copyright statement.
283        s/$stmt_re/$stmt_wrapped/;
284      }
285  }
286else
287  {
288    print STDERR "$ARGV: warning: copyright statement not found\n";
289  }
290
291# Hey Emacs!
292# Local variables:
293# coding: utf-8
294# mode: perl
295# indent-tabs-mode: nil
296# eval: (add-hook 'before-save-hook 'time-stamp)
297# time-stamp-line-limit: 200
298# time-stamp-start: "my $VERSION = '"
299# time-stamp-format: "%:y-%02m-%02d.%02H:%02M"
300# time-stamp-time-zone: "UTC0"
301# time-stamp-end: "'; # UTC"
302# End:
303