1#!/usr/bin/env perl
2#
3# Copyright © 2010-2014 Cisco Systems, Inc.  All rights reserved.
4# Copyright © 2011-2018 Inria.  All rights reserved.
5# $COPYRIGHT$
6#
7
8# Short version:
9#
10# This script automates the tedious task of updating copyright notices
11# in the tops of hwloc source files before committing back to
12# the respository.  Set the environment variable
13# HWLOC_COPYRIGHT_SEARCH_NAME to a short (case-insensitive) name that
14# indicates your copyright line (e.g., "cisco"), and set the env
15# variable HWLOC_COPYRIGHT_FORMAL_NAME with your organization's formal
16# name and copyright statement (e.g., "Cisco Systems, Inc.  All rights
17# reserved.") before running the script.
18
19# More details:
20#
21# This is a simple script to traverse the tree looking for added and
22# changed files (via "git status").  Note that the search starts in
23# the current directory -- not the top-level directory.
24#
25# All added and changed files are examined.  If the special "See
26# COPYING in top-level directory" token is found, then lines above
27# that token are examined to find the "search" copyright name.
28#
29# - If the search name is found, that line is examined to see if the
30#   current year is in the copyright year range.  If it is not, the line
31#   is modified to include the current year.
32# - If the search name is not found, a new line is created in the
33#   copyright block of the file using the formal name and the current
34#   year.
35#
36# NOTE: this script currently doesn't handle multi-line copyright
37# statements, such as:
38#
39# Copyright © 2010 University of Blabbityblah and the Trustees of
40#                    Schblitbittyboo.  All rights reserved.
41#
42# Someone could certainly extend this script to do so, if they cared
43# (my organizations' copyright fits on a single line, so I wasn't
44# motivated to handle the multi-line case :-) ).
45#
46
47use strict;
48use Cwd;
49use Getopt::Long;
50use File::stat;
51use Fcntl ':mode';
52
53# Set to true if the script should merely check for up-to-date copyrights.
54# Will exit with status 111 if there are out of date copyrights which this
55# script can correct.
56my $CHECK_ONLY = 0;
57# used by $CHECK_ONLY logic for bookeeping
58my $would_replace = 0;
59
60# Set to true to suppress most informational messages.  Only out of date files
61# will be printed.
62my $QUIET = 0;
63
64# Set to true if we just want to see the help message
65my $HELP = 0;
66
67# Defaults
68my $my_search_name = "Cisco";
69my $my_formal_name = "Cisco Systems, Inc.  All rights reserved.";
70
71my @tokens;
72push(@tokens, "See COPYING in top-level directory");
73push(@tokens, "\\\$COPYRIGHT\\\$");
74
75# Override the defaults if some values are set in the environment
76$my_search_name = $ENV{HWLOC_COPYRIGHT_SEARCH_NAME}
77    if (defined($ENV{HWLOC_COPYRIGHT_SEARCH_NAME}));
78$my_formal_name = $ENV{HWLOC_COPYRIGHT_FORMAL_NAME}
79    if (defined($ENV{HWLOC_COPYRIGHT_FORMAL_NAME}));
80
81print "==> Copyright search name: $my_search_name\n";
82print "==> Copyright formal name: $my_formal_name\n";
83
84GetOptions(
85    "help" => \$HELP,
86    "quiet" => \$QUIET,
87    "check-only" => \$CHECK_ONLY,
88    "search-name=s" => \$my_search_name,
89    "formal-name=s" => \$my_formal_name,
90) or die "unable to parse options, stopped";
91
92if ($HELP) {
93    print <<EOT;
94$0 [options] [directory]
95
96[directory] is "." unless specified.
97
98--help | -h          This help message
99--quiet | -q         Only output critical messages to stdout
100--check-only         exit(111) if there are files with copyrights to edit
101--search-name=NAME   Set search name to NAME
102--formal-same=NAME   Set formal name to NAME
103EOT
104    exit(0);
105}
106
107#-------------------------------------------------------------------------------
108# predeclare sub for print-like syntax
109sub quiet_print {
110    unless ($QUIET) {
111        print @_;
112    }
113}
114
115#-------------------------------------------------------------------------------
116
117quiet_print "==> Copyright search name: $my_search_name\n";
118quiet_print "==> Copyright formal name: $my_formal_name\n";
119
120# Get the year
121my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime;
122$year += 1900;
123quiet_print "==> This year: $year\n";
124
125# Find the top-level HWLOC source tree dir
126my $start = defined $ARGV[0] ? $ARGV[0] : cwd();
127my $top = $start;
128while (! -d "$top/hwloc" && ! -d "$top/netloc") {
129    chdir("..");
130    $top = cwd();
131    die "Can't find top-level hwloc directory"
132        if ($top eq "/");
133}
134chdir($start);
135
136quiet_print "==> Top-level hwloc dir: $top\n";
137quiet_print "==> Current directory: $start\n";
138
139my @files = find_modified_files();
140
141if ($#files < 0) {
142    quiet_print "No added / changed files -- nothing to do\n";
143    exit(0);
144}
145
146# Examine each of the files and see if they need an updated copyright
147foreach my $f (@files) {
148    quiet_print "Processing added/changed file: $f\n";
149    open(FILE, $f) || die "Can't open file: $f";
150
151    # Read in the file, and look for the special tokens; that's the
152    # end of the copyright block that we're allowed to edit.  Do not
153    # edit any copyright notices that may appear below that.
154
155    my $i = 0;
156    my $found_copyright = 0;
157    my $found_me = 0;
158    my @lines;
159    my $my_line_index;
160    my $token_line_index;
161    my $token;
162    while (<FILE>) {
163        push(@lines, $_);
164        foreach my $t (@tokens) {
165            if ($_ =~ /$t/) {
166                $token_line_index = $i;
167                $token = $t;
168            }
169        }
170        $my_line_index = $i
171            if (!defined($token_line_index) && $_ =~ /$my_search_name/i);
172        ++$i;
173    }
174    close(FILE);
175
176    # If there was not copyright token, don't do anything
177    if (!defined($token_line_index)) {
178        quiet_print "==> WARNING: Did not find any end-of-copyright tokens!\n";
179        quiet_print "    File left unchanged\n";
180        next;
181    }
182
183    # don't modify ourself while running
184    if ($f =~ m/update-my-copyright\.pl$/) {
185        quiet_print "==> WARNING: Cannot modify myself while running!\n";
186        quiet_print "    File left unchanged\n";
187        next;
188    }
189
190    # Figure out the line prefix
191    $lines[$token_line_index] =~ m/^(.+)$token/;
192    my $prefix = $1;
193
194    # Now act on it
195    if (!defined($my_line_index)) {
196        quiet_print "--- My copyright line not found; adding:\n";
197        my $str = "${prefix}Copyright © $year $my_formal_name\n";
198        quiet_print "    $str";
199        $lines[$token_line_index] = $str . $lines[$token_line_index];
200    } else {
201        quiet_print "--- Found existing copyright line:\n";
202        quiet_print "    $lines[$my_line_index]";
203        $lines[$my_line_index] =~ m/([\d+\-]+)/;
204        my $years = $1;
205        die "Could not find years in copyright line!"
206            if (!defined($years));
207
208        # If it's a range, separate them out
209        my $first_year;
210        my $last_year;
211        if ($years =~ /\-/) {
212            $years =~ m/(\d+)\s*-\s*(\d+)/;
213            $first_year = $1;
214            $last_year = $2;
215        } else {
216            $first_year = $last_year = $years;
217        }
218
219        # Sanity check
220        die "Copyright looks like it extends before 1990...?"
221            if ($first_year < 1990);
222        die "Copyright in the future...?"
223            if ($last_year > $year);
224
225        # Do we need to do anything?
226        if ($year > $last_year) {
227            $lines[$my_line_index] = "${prefix}Copyright © $first_year-$year $my_formal_name\n";
228            quiet_print "    Updated to:\n";
229            quiet_print "    $lines[$my_line_index]";
230        } else {
231            quiet_print "    This year already included in copyright; not changing file\n";
232            next;
233        }
234    }
235
236    # If we got this far, we want to write out a new file
237    my $newf = "$f.new-copyright";
238    unlink($newf);
239    open(FILE, ">$newf") || die "Can't open file: $newf";
240    print FILE join('', @lines);
241    close(FILE);
242
243    if ($CHECK_ONLY) {
244        # intentional "loud" print to be more useful in a pre-commit hook
245        print "==> '$f' has a stale/missing copyright\n";
246        unlink($newf);
247        ++$would_replace;
248    }
249    else {
250        # Now replace the old one, keeping its mode
251        my $mode = (stat($f))->mode;
252        chmod $mode, $newf;
253        unlink($f);
254        rename($newf, $f);
255    }
256}
257
258if ($CHECK_ONLY and $would_replace) {
259    exit(111);
260}
261
262#-------------------------------------------------------------------------------
263
264# Returns a list of file names (relative to pwd) which git considers
265# to be modified.
266sub find_modified_files {
267    my @files = ();
268
269    # Number of path entries to remove from ${top}-relative paths.
270    # (--show-cdup either returns the empty string or sequence of "../"
271    # entries, always ending in a "/")
272    my $n_strip = scalar(split(m!/!, scalar(`git rev-parse --show-cdup`))) - 1;
273
274    # "." restricts scope, but does not get us relative path names
275    my $cmd = "git status -z --porcelain --untracked-files=no .";
276    quiet_print "==> Running: \"$cmd\"\n";
277    my $lines = `$cmd`;
278
279    # From git-status(1):
280    # X          Y     Meaning
281    # -------------------------------------------------
282    #           [MD]   not updated
283    # M        [ MD]   updated in index
284    # A        [ MD]   added to index
285    # D         [ M]   deleted from index
286    # R        [ MD]   renamed in index
287    # C        [ MD]   copied in index
288    # [MARC]           index and work tree matches
289    # [ MARC]     M    work tree changed since index
290    # [ MARC]     D    deleted in work tree
291    # -------------------------------------------------
292    # D           D    unmerged, both deleted
293    # A           U    unmerged, added by us
294    # U           D    unmerged, deleted by them
295    # U           A    unmerged, added by them
296    # D           U    unmerged, deleted by us
297    # A           A    unmerged, both added
298    # U           U    unmerged, both modified
299    # -------------------------------------------------
300    # ?           ?    untracked
301    # -------------------------------------------------
302    foreach my $line (split /\x{00}/, $lines) {
303        my $keep = 0;
304        my ($s1, $s2, $fullname) = $line =~ m/^(.)(.) (.*)$/;
305
306        # ignore all merge cases
307        next if ($s1 eq "D" and $s2 eq "D");
308        next if ($s1 eq "A" and $s2 eq "A");
309        next if ($s1 eq "U" or $s2 eq "U");
310
311        # only update for actually added/modified cases, no copies,
312        # renames, etc.
313        $keep = 1 if ($s1 eq "M" or $s2 eq "M");
314        $keep = 1 if ($s1 eq "A");
315
316        if ($keep) {
317            my $relname = $fullname;
318            $relname =~ s!^([^/]*/){$n_strip}!!g;
319
320            push @files, $relname
321                if (-f $relname);
322        }
323    }
324
325    return @files;
326}
327