1#!/usr/bin/perl
2# -*- tab-width: 8; indent-tabs-mode: t; cperl-indent-level: 4 -*-
3# This script was originally based on the script of the same name from
4# the KDE SDK (by dfaure@kde.org)
5#
6# This version is
7#   Copyright (C) 2007, 2008 Adam D. Barratt
8#   Copyright (C) 2012 Francesco Poli
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License along
21# with this program. If not, see <https://www.gnu.org/licenses/>.
22
23# Originally copied from Debian's devscripts. A more modern version of
24# this can be found at
25# https://anonscm.debian.org/git/pkg-perl/packages/licensecheck.git/
26
27=head1 NAME
28
29licensecheck - simple license checker for source files
30
31=head1 SYNOPSIS
32
33B<licensecheck> B<--help>|B<--version>
34
35B<licensecheck> [B<--no-conf>] [B<--verbose>] [B<--copyright>]
36[B<-l>|B<--lines=>I<N>] [B<-i>|B<--ignore=>I<regex>] [B<-c>|B<--check=>I<regex>]
37[B<-m>|B<--machine>] [B<-r>|B<--recursive>]  [B<-e>|B<--encoding=>I<...>]
38I<list of files and directories to check>
39
40=head1 DESCRIPTION
41
42B<licensecheck> attempts to determine the license that applies to each file
43passed to it, by searching the start of the file for text belonging to
44various licenses.
45
46If any of the arguments passed are directories, B<licensecheck> will add
47the files contained within to the list of files to process.
48
49=head1 OPTIONS
50
51=over 4
52
53=item B<--verbose>, B<--no-verbose>
54
55Specify whether to output the text being processed from each file before
56the corresponding license information.
57
58Default is to be quiet.
59
60=item B<-l=>I<N>, B<--lines=>I<N>
61
62Specify the number of lines of each file's header which should be parsed
63for license information. (Default is 60).
64
65=item B<--tail=>I<N>
66
67By default, the last 5k bytes of each files are parsed to get license
68information. You may use this option to set the size of this parsed chunk.
69You may set this value to 0 to avoid parsing the end of the file.
70
71=item B<-i=>I<regex>, B<--ignore=>I<regex>
72
73When processing the list of files and directories, the regular
74expression specified by this option will be used to indicate those which
75should not be considered (e.g. backup files, VCS metadata).
76
77=item B<-r>, B<--recursive>
78
79Specify that the contents of directories should be added
80recursively.
81
82=item B<-c=>I<regex>, B<--check=>I<regex>
83
84Specify a pattern against which filenames will be matched in order to
85decide which files to check the license of.
86
87The default includes common source files.
88
89=item B<-s>, B<--skipped>
90
91Specify whether to show skipped files, i.e. files found which do not
92match the check regexp (see C<--check> option). Default is to not show
93skipped files.
94
95Note that ignored files (like C<.git> or C<.svn>) are not shown even when
96this option is used.
97
98=item B<--copyright>
99
100Also display copyright text found within the file
101
102=item B<-e> B<--encoding>
103
104Specifies input encoding of source files. By default, input files are
105not decoded. When encoding is specified, license and copyright
106information are printed on STDOUT as utf8, or garbage if you got the
107encoding wrong.
108
109=item B<-m>, B<--machine>
110
111Display the information in a machine readable way, i.e. in the form
112<file><tab><license>[<tab><copyright>] so that it can be easily sorted
113and/or filtered, e.g. with the B<awk> and B<sort> commands.
114Note that using the B<--verbose> option will kill the readability.
115
116=item B<--no-conf>, B<--noconf>
117
118Do not read any configuration files. This can only be used as the first
119option given on the command line.
120
121=back
122
123=head1 CONFIGURATION VARIABLES
124
125The two configuration files F</etc/devscripts.conf> and
126F<~/.devscripts> are sourced by a shell in that order to set
127configuration variables.  Command line options can be used to override
128configuration file settings.  Environment variable settings are
129ignored for this purpose.  The currently recognised variables are:
130
131=over 4
132
133=item B<LICENSECHECK_VERBOSE>
134
135If this is set to I<yes>, then it is the same as the B<--verbose> command
136line parameter being used. The default is I<no>.
137
138=item B<LICENSECHECK_PARSELINES>
139
140If this is set to a positive number then the specified number of lines
141at the start of each file will be read whilst attempting to determine
142the license(s) in use.  This is equivalent to the B<--lines> command line
143option.
144
145=back
146
147=head1 LICENSE
148
149This code is copyright by Adam D. Barratt <I<adam@adam-barratt.org.uk>>,
150all rights reserved; based on a script of the same name from the KDE
151SDK, which is copyright by <I<dfaure@kde.org>>.
152This program comes with ABSOLUTELY NO WARRANTY.
153You are free to redistribute this code under the terms of the GNU
154General Public License, version 2 or later.
155
156=head1 AUTHOR
157
158Adam D. Barratt <adam@adam-barratt.org.uk>
159
160=cut
161
162# see https://stackoverflow.com/questions/6162484/why-does-modern-perl-avoid-utf-8-by-default/6163129#6163129
163use v5.14;
164use utf8;
165
166use strict;
167use autodie;
168use warnings;
169use warnings    qw< FATAL  utf8     >;
170
171use Getopt::Long qw(:config gnu_getopt);
172use File::Basename;
173use File::stat;
174use IO::File;
175use Fcntl qw/:seek/;
176
177binmode STDOUT, ':utf8';
178
179my $progname = basename($0);
180
181# From dpkg-source
182my $default_ignore_regex = qr!
183# Ignore general backup files
184~$|
185# Ignore emacs recovery files
186(?:^|/)\.#|
187# Ignore vi swap files
188(?:^|/)\..*\.swp$|
189# Ignore baz-style junk files or directories
190(?:^|/),,.*(?:$|/.*$)|
191# File-names that should be ignored (never directories)
192(?:^|/)(?:DEADJOE|\.cvsignore|\.arch-inventory|\.bzrignore|\.gitignore)$|
193# File or directory names that should be ignored
194(?:^|/)(?:CVS|RCS|\.pc|\.deps|\{arch\}|\.arch-ids|\.svn|\.hg|_darcs|\.git|
195\.shelf|_MTN|\.bzr(?:\.backup|tags)?)(?:$|/.*$)
196!x;
197
198my $default_check_regex =
199		qr!
200		\.(                          # search for file suffix
201				c(c|pp|xx)?              # c and c++
202			 |h(h|pp|xx)?              # header files for c and c++
203			 |S
204			 |css|less                 # HTML css and similar
205			 |f(77|90)?
206			 |go
207			 |groovy
208			 |lisp
209			 |scala
210			 |clj
211			 |p(l|m)?6?|t|xs|pod6?     # perl5 or perl6
212			 |sh
213			 |php
214			 |py(|x)
215			 |rb
216			 |java
217			 |js
218			 |vala
219			 |el
220			 |sc(i|e)
221			 |cs
222			 |pas
223			 |inc
224			 |dtd|xsl
225			 |mod
226			 |m
227			 |md|markdown
228			 |tex
229			 |mli?
230			 |(c|l)?hs
231		 )
232		$
233	 !x;
234
235# also used to cleanup
236my $copyright_indicator_regex
237		= qr!
238				 (?:copyright	# The full word
239						|copr\.	# Legally-valid abbreviation
240						|\xc2\xa9	# Unicode copyright sign encoded in iso8859
241			|\x{00a9}	# Unicode character COPYRIGHT SIGN
242			#|©		# Unicode character COPYRIGHT SIGN
243						|\(c\)	# Legally-null representation of sign
244				 )
245				!lix;
246
247my $copyright_indicator_regex_with_capture = qr!$copyright_indicator_regex(?::\s*|\s+)(\S.*)$!lix;
248
249# avoid ditching things like <info@foo.com>
250my $copyright_disindicator_regex
251		= qr{
252			\b(?:info(?:rmation)?(?!@)	# Discussing copyright information
253						|(notice|statement|claim|string)s?	# Discussing the notice
254						|is|in|to        # Part of a sentence
255						|(holder|owner)s?       # Part of a sentence
256						|ownership              # Part of a sentence
257						)\b
258				}ix;
259
260my $copyright_predisindicator_regex
261		= qr!(
262						 ^[#]define\s+.*\(c\)    # #define foo(c) -- not copyright
263				 )!ix;
264
265my $modified_conf_msg;
266
267my %OPT=(
268		verbose        => '',
269		lines          => '',
270		noconf         => '',
271		ignore         => '',
272		check          => '',
273		recursive      => 0,
274		copyright      => 0,
275		machine        => 0,
276		text           => 0,
277		skipped        => 0,
278);
279
280my $def_lines = 60;
281my $def_tail = 5000; # roughly 60 lines of 80 chars
282
283# Read configuration files and then command line
284# This is boilerplate
285
286if (@ARGV and $ARGV[0] =~ /^--no-?conf$/) {
287		$modified_conf_msg = "  (no configuration files read)";
288		shift;
289} else {
290		my @config_files = ('/etc/devscripts.conf', '~/.devscripts');
291		my %config_vars = (
292					 'LICENSECHECK_VERBOSE' => 'no',
293					 'LICENSECHECK_PARSELINES' => $def_lines,
294					);
295		my %config_default = %config_vars;
296
297		my $shell_cmd;
298		# Set defaults
299		foreach my $var (keys %config_vars) {
300			$shell_cmd .= qq[$var="$config_vars{$var}";\n];
301		}
302		$shell_cmd .= 'for file in ' . join(" ", @config_files) . "; do\n";
303		$shell_cmd .= '[ -f $file ] && . $file; done;' . "\n";
304		# Read back values
305		foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" }
306		my $shell_out = `/bin/bash -c '$shell_cmd'`;
307		@config_vars{keys %config_vars} = split /\n/, $shell_out, -1;
308
309		# Check validity
310		$config_vars{'LICENSECHECK_VERBOSE'} =~ /^(yes|no)$/
311			or $config_vars{'LICENSECHECK_VERBOSE'} = 'no';
312		$config_vars{'LICENSECHECK_PARSELINES'} =~ /^[1-9][0-9]*$/
313			or $config_vars{'LICENSECHECK_PARSELINES'} = $def_lines;
314
315		foreach my $var (sort keys %config_vars) {
316			if ($config_vars{$var} ne $config_default{$var}) {
317				$modified_conf_msg .= "  $var=$config_vars{$var}\n";
318			}
319		}
320		$modified_conf_msg ||= "  (none)\n";
321		chomp $modified_conf_msg;
322
323		$OPT{'verbose'} = $config_vars{'LICENSECHECK_VERBOSE'} eq 'yes' ? 1 : 0;
324		$OPT{'lines'} = $config_vars{'LICENSECHECK_PARSELINES'};
325}
326
327GetOptions(\%OPT,
328		"help|h",
329		"check|c=s",
330		"copyright",
331		"encoding|e=s",
332		"ignore|i=s",
333		"lines|l=i",
334		"machine|m",
335		"noconf|no-conf",
336		"recursive|r",
337		"skipped|s",
338		"tail",
339		"text|t",
340		"verbose!",
341		"version|v",
342) or die "Usage: $progname [options] filelist\nRun $progname --help for more details\n";
343
344$OPT{'lines'} = $def_lines if $OPT{'lines'} !~ /^[1-9][0-9]*$/;
345my $ignore_regex = length($OPT{ignore}) ? qr/$OPT{ignore}/ : $default_ignore_regex;
346
347my $check_regex = $default_check_regex;
348$check_regex = qr/$OPT{check}/ if length $OPT{check};
349
350if ($OPT{'noconf'}) {
351		fatal("--no-conf is only acceptable as the first command-line option!");
352}
353if ($OPT{'help'}) { help(); exit 0; }
354if ($OPT{'version'}) { version(); exit 0; }
355
356if ($OPT{text}) {
357		warn "$0 warning: option -text is deprecated\n"; # remove -text end 2015
358}
359
360die "Usage: $progname [options] filelist\nRun $progname --help for more details\n" unless @ARGV;
361
362$OPT{'lines'} = $def_lines if not defined $OPT{'lines'};
363
364my @files = ();
365my @find_args = ();
366my $files_count = @ARGV;
367
368push @find_args, qw(-maxdepth 1) unless $OPT{'recursive'};
369push @find_args, qw(-follow -type f -print);
370
371while (@ARGV) {
372		my $file = shift @ARGV;
373
374		if (-d $file) {
375			open my $FIND, '-|', 'find', $file, @find_args
376				or die "$progname: couldn't exec find: $!\n";
377
378			while (my $found = <$FIND>) {
379				chomp ($found);
380				# Silently skip empty files or ignored files
381				next if -z $found or $found =~ $ignore_regex;
382				if ( not $check_regex or $found =~ $check_regex ) {
383					# Silently skip empty files or ignored files
384					push @files, $found ;
385				}
386				else {
387					warn "skipped file $found\n" if $OPT{skipped};
388				}
389			}
390			close $FIND;
391		}
392		elsif ($file =~ $ignore_regex) {
393			# Silently skip ignored files
394			next;
395		}
396		elsif ( $files_count == 1 or not $check_regex or $file =~ $check_regex ) {
397			push @files, $file;
398		}
399		else {
400			warn "skipped file $file\n" if $OPT{skipped};
401		}
402}
403
404while (@files) {
405		my $file = shift @files;
406		my $content = '';
407		my $copyright_match;
408		my $copyright = '';
409
410		my $st = stat $file;
411
412		my $enc = $OPT{encoding} ;
413		my $mode = $enc ? "<:encoding($enc)" : '<';
414		# need to use "<" when encoding is unknown otherwise we break compatibility
415		my $fh = IO::File->new ($file ,$mode) or die "Unable to access $file\n";
416
417		while ( my $line = $fh->getline ) {
418			last if ($fh->input_line_number > $OPT{'lines'});
419			$content .= $line;
420		}
421
422		my %copyrights = extract_copyright($content);
423
424		print qq(----- $file header -----\n$content----- end header -----\n\n)
425		if $OPT{'verbose'};
426
427		my $license = parselicense(clean_cruft_and_spaces(clean_comments($content)));
428		$copyright = join(" / ", reverse sort values %copyrights);
429
430		if ( not $copyright and $license eq 'UNKNOWN') {
431			my $position = $fh->tell; # See IO::Seekable
432			my $tail_size = $OPT{tail} // $def_tail;
433			my $jump = $st->size - $tail_size;
434			$jump = $position if $jump < $position;
435
436			my $tail ;
437			if ( $tail_size and $jump < $st->size) {
438				$fh->seek($jump, SEEK_SET) ; # also IO::Seekable
439				$tail .= join('',$fh->getlines);
440			}
441
442			print qq(----- $file tail -----\n$tail----- end tail -----\n\n)
443			if $OPT{'verbose'};
444
445			%copyrights = extract_copyright($tail);
446			$license = parselicense(clean_cruft_and_spaces(clean_comments($tail)));
447			$copyright = join(" / ", reverse sort values %copyrights);
448		}
449
450		$fh->close;
451
452		if ($OPT{'machine'}) {
453			print "$file\t$license";
454			print "\t" . ($copyright or "*No copyright*") if $OPT{'copyright'};
455			print "\n";
456		} else {
457			print "$file: ";
458			print "*No copyright* " unless $copyright;
459			print $license . "\n";
460			print "  [Copyright: " . $copyright . "]\n"
461			if $copyright and $OPT{'copyright'};
462				print "\n" if $OPT{'copyright'};
463		}
464}
465
466sub extract_copyright {
467		my $content = shift;
468		my @c = split /\n/, clean_comments($content);
469
470		my %copyrights;
471		my $lines_after_copyright_block = 0;
472
473		my $in_copyright_block = 0;
474		while (@c) {
475		my $line = shift @c ;
476		my $copyright_match = parse_copyright($line, \$in_copyright_block) ;
477		if ($copyright_match) {
478			while (@c and $copyright_match =~ /\d[,.]?\s*$/) {
479				# looks like copyright end with a year, assume the owner is on next line(s)
480				$copyright_match .= ' '. shift @c;
481			}
482			$copyright_match =~ s/\s+/ /g;
483			$copyright_match =~ s/\s*$//;
484			$copyrights{lc("$copyright_match")} = "$copyright_match";
485		}
486		elsif (scalar keys %copyrights) {
487			# skip remaining lines if a copyright blocks was found more than 5 lines ago.
488			# so a copyright block may contain up to 5 blank lines, but no more
489			last if $lines_after_copyright_block++ > 5;
490		}
491	}
492	return %copyrights;
493}
494
495sub parse_copyright {
496		my $data = shift ;
497		my $in_copyright_block_ref = shift;
498		my $copyright = '';
499		my $match;
500
501		if ( $data !~ $copyright_predisindicator_regex) {
502		#print "match against ->$data<-\n";
503				if ($data =~ $copyright_indicator_regex_with_capture) {
504						$match = $1;
505						$$in_copyright_block_ref = 1;
506						# Ignore lines matching "see foo for copyright information" etc.
507						if ($match !~ $copyright_disindicator_regex) {
508								# De-cruft
509								$match =~ s/$copyright_indicator_regex//igx;
510								$match =~ s/^\s+//;
511								$match =~ s/\s*\bby\b\s*/ /;
512								$match =~ s/([,.])?\s*$//;
513								$match =~ s/\s{2,}/ /g;
514								$match =~ s/\\//g; # de-cruft nroff files
515								$match =~ s/\s*[*#]\s*$//;
516								$copyright = $match;
517						}
518				}
519				elsif ($$in_copyright_block_ref and $data =~ /^\d{2,}[,\s]+/) {
520						# following lines beginning with a year are supposed to be
521						# continued copyright blocks
522						$copyright = $data;
523				}
524				else {
525						$$in_copyright_block_ref = 0;
526				}
527		}
528
529		return $copyright;
530}
531
532sub clean_comments {
533		local $_ = shift or return q{};
534
535		# Remove generic comments: look for 4 or more lines beginning with
536		# regular comment pattern and trim it. Fall back to old algorithm
537		# if no such pattern found.
538		my @matches = m/^\s*((?:[^a-zA-Z0-9\s]{1,3}|\bREM\b))\s\w/mg;
539		if (@matches >= 4) {
540				my $comment_re = qr/\s*[\Q$matches[0]\E]{1,3}\s*/;
541				s/^$comment_re//mg;
542		}
543
544		# Remove Fortran comments
545		s/^[cC] //gm;
546
547		# Remove C / C++ comments
548		s#(\*/|/[/*])##g;
549
550		return $_;
551}
552
553sub clean_cruft_and_spaces {
554		local $_ = shift or return q{};
555
556		tr/\t\r\n/ /;
557
558		# this also removes quotes
559		tr% A-Za-z.+,@:;0-9\(\)/-%%cd;
560		tr/ //s;
561
562		return $_;
563}
564
565sub help {
566	 print <<"EOF";
567Usage: $progname [options] filename [filename ...]
568Valid options are:
569	 --help, -h             Display this message
570	 --version, -v          Display version and copyright info
571	 --no-conf, --noconf    Don't read devscripts config files; must be
572													the first option given
573	 --verbose              Display the header of each file before its
574														license information
575	 --skipped, -s          Show skipped files
576	 --lines, -l            Specify how many lines of the file header
577														should be parsed for license information
578														(Default: $def_lines)
579	 --tail                 Specify how many bytes to parse at end of file
580														(Default: $def_tail)
581	 --check, -c            Specify a pattern indicating which files should
582														 be checked
583														 (Default: '$default_check_regex')
584	 --machine, -m          Display in a machine readable way (good for awk)
585	 --recursive, -r        Add the contents of directories recursively
586	 --copyright            Also display the file's copyright
587	 --ignore, -i           Specify that files / directories matching the
588														regular expression should be ignored when
589														checking files
590														(Default: '$default_ignore_regex')
591
592Default settings modified by devscripts configuration files:
593$modified_conf_msg
594EOF
595}
596
597sub version {
598		print <<"EOF";
599This is $progname, from the Debian devscripts package, version 2.16.2
600Copyright (C) 2007, 2008 by Adam D. Barratt <adam\@adam-barratt.org.uk>; based
601on a script of the same name from the KDE SDK by <dfaure\@kde.org>.
602
603This program comes with ABSOLUTELY NO WARRANTY.
604You are free to redistribute this code under the terms of the
605GNU General Public License, version 2, or (at your option) any
606later version.
607EOF
608}
609
610sub parselicense {
611		my ($licensetext) = @_;
612
613		my $gplver = "";
614		my $extrainfo = "";
615		my $license = "";
616
617		if ($licensetext =~ /version ([^ ]+)(?: of the License)?,? or(?: \(at your option\))? version (\d(?:[.-]\d+)*)/) {
618			$gplver = " (v$1 or v$2)";
619		} elsif ($licensetext =~ /version ([^, ]+?)[.,]? (?:\(?only\)?.? )?(?:of the GNU (Affero )?(Lesser |Library )?General Public License )?(as )?published by the Free Software Foundation/i or
620			$licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License (?:as )?published by the Free Software Foundation[;,] version ([^, ]+?)[.,]? /i) {
621
622			$gplver = " (v$1)";
623		} elsif ($licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License\s*(?:[(),GPL]+)\s*version (\d+(?:\.\d+)?)[ \.]/i) {
624			$gplver = " (v$1)";
625		} elsif ($licensetext =~ /either version ([^ ]+)(?: of the License)?, or (?:\(at your option\) )?any later version/) {
626			$gplver = " (v$1 or later)";
627		} elsif ($licensetext =~ /GPL\sas\spublished\sby\sthe\sFree\sSoftware\sFoundation,\sversion\s([\d.]+)/i ) {
628			$gplver = " (v$1)";
629		} elsif ($licensetext =~ /SPDX-License-Identifier:\s+GPL-([1-9])\.0-or-later/i ){
630			$gplver = " (v$1 or later)";
631		} elsif ($licensetext =~ /SPDX-License-Identifier:\s+GPL-([1-9])\.0[^+]/i ) {
632			$gplver = " (v$1)";
633		} elsif ($licensetext =~ /SPDX-License-Identifier:\s+GPL-([1-9])\.0\+/i ) {
634			$gplver = " (v$1 or later)";
635		} elsif ($licensetext =~ /SPDX-License-Identifier:\s+LGPL-([1-9])\.[0-1]\-or-later/i ) {
636			$gplver = " (v$1 or later)";
637		}
638
639		if ($licensetext =~ /(?:675 Mass Ave|59 Temple Place|51 Franklin Steet|02139|02111-1307)/i) {
640			$extrainfo = " (with incorrect FSF address)$extrainfo";
641		}
642
643		if ($licensetext =~ /permission (?:is (also granted|given))? to link (the code of )?this program with (any edition of )?(Qt|the Qt library)/i) {
644			$extrainfo = " (with Qt exception)$extrainfo"
645		}
646
647		if ($licensetext =~ /As a special exception, you may create a larger work that contains part or all of the Bison parser skeleton and distribute that work under terms of your choice/) {
648			$extrainfo = " (with Bison parser exception)$extrainfo";
649		}
650
651		# exclude blurb found in boost license text
652		if ($licensetext =~ /(All changes made in this file will be lost|DO NOT (EDIT|delete this file)|Generated (automatically|by|from)|generated.*file)/i
653				and $licensetext !~  /unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor/) {
654			$license = "GENERATED FILE";
655		}
656
657		if ($licensetext =~ /(are made available|(is free software.? )?you can redistribute (it|them) and(?:\/|\s+)or modify (it|them)|is licensed) under the terms of (version [^ ]+ of )?the (GNU (Library |Lesser )General Public License|LGPL)/i) {
658			$license = "LGPL$gplver$extrainfo $license";
659		}
660		# For Perl modules handled by Dist::Zilla
661		elsif ($licensetext =~ /this is free software,? licensed under:? (?:the )?(?:GNU (?:Library |Lesser )General Public License|LGPL),? version ([\d\.]+)/i) {
662			$license = "LGPL (v$1) $license";
663		}
664
665		if ($licensetext =~ /is free software.? you can redistribute (it|them) and(?:\/|\s+)or modify (it|them) under the terms of the (GNU Affero General Public License|AGPL)/i) {
666			$license = "AGPL$gplver$extrainfo $license";
667		}
668
669		if ($licensetext =~ /(is free software.? )?you (can|may) redistribute (it|them) and(?:\/|\s+)or modify (it|them) under the terms of (?:version [^ ]+ (?:\(?only\)? )?of )?the GNU General Public License/i) {
670			$license = "GPL$gplver$extrainfo $license";
671		}
672
673		if ($licensetext =~ /is distributed under the terms of the GNU General Public License,/
674				and length $gplver) {
675			$license = "GPL$gplver$extrainfo $license";
676		}
677
678		if ($licensetext =~ /SPDX-License-Identifier:\s+GPL/i and length $gplver) {
679			$license = "GPL$gplver$extrainfo $license";
680		}
681
682		if ($licensetext =~ /SPDX-License-Identifier:\s+GPL-2.0-or-later/i and length $gplver) {
683			$license = "GPL$gplver$extrainfo";
684		}
685
686		if ($licensetext =~ /SPDX-License-Identifier:\s+LGPL/i and length $gplver) {
687			$license = "LGPL$gplver$extrainfo $license";
688		}
689
690		if ($licensetext =~ /SPDX-License-Identifier:\s+Zlib/i) {
691			$license = "zlib/libpng $license";
692		}
693
694		if ($licensetext =~ /SPDX-License-Identifier:\s+BSD-3-Clause/i) {
695			$license = 'BSD (3 clause)';
696		}
697
698		if ($licensetext =~ /SPDX-License-Identifier:\s+BSD-2-Clause/i) {
699			$license = 'BSD (2 clause)';
700		}
701
702		if ($licensetext =~ /SPDX-License-Identifier:\s+BSD-1-Clause/i) {
703			$license = 'BSD';
704		}
705
706		if ($licensetext =~ /SPDX-License-Identifier:\s+MIT/i) {
707			$license = 'MIT/X11 (BSD like)';
708		}
709
710		if ($licensetext =~ /SPDX-License-Identifier:\s+ISC/i) {
711			$license = 'ISC';
712		}
713
714		if ($licensetext =~ /(?:is|may be)\s(?:(?:distributed|used).*?terms|being\s+released).*?\b(L?GPL)\b/) {
715			my $v = $gplver || ' (unversioned/unknown version)';
716			$license = "$1$v $license";
717		}
718
719		if ($licensetext =~ /the rights to distribute and use this software as governed by the terms of the Lisp Lesser General Public License|\bLLGPL\b/ ) {
720			$license = "LLGPL $license";
721		}
722
723		if ($licensetext =~ /This file is part of the .*Qt GUI Toolkit. This file may be distributed under the terms of the Q Public License as defined/) {
724			$license = "QPL (part of Qt) $license";
725		} elsif ($licensetext =~ /may (be distributed|redistribute it) under the terms of the Q Public License/) {
726			$license = "QPL $license";
727		}
728
729		if ($licensetext =~ /opensource\.org\/licenses\/mit-license\.php/) {
730			$license = "MIT/X11 (BSD like) $license";
731		} elsif ($licensetext =~ /Permission is hereby granted, free of charge, to any person obtaining a copy of this software and(\/or)? associated documentation files \(the (Software|Materials)\), to deal in the (Software|Materials)/) {
732			$license = "MIT/X11 (BSD like) $license";
733		} elsif ($licensetext =~ /Permission is hereby granted, without written agreement and without license or royalty fees, to use, copy, modify, and distribute this software and its documentation for any purpose/) {
734			$license = "MIT/X11 (BSD like) $license";
735		}
736
737		if ($licensetext  =~ /Permission to use, copy, modify, and(\/or)? distribute this software for any purpose with or without fee is hereby granted, provided.*copyright notice.*permission notice.*all copies/) {
738			$license = "ISC $license";
739		}
740
741		if ($licensetext =~ /THIS SOFTWARE IS PROVIDED .*AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY/) {
742			if ($licensetext =~ /All advertising materials mentioning features or use of this software must display the following acknowledge?ment.*This product includes software developed by/i) {
743					$license = "BSD (4 clause) $license";
744			} elsif ($licensetext =~ /(The name(?:\(s\))? .*? may not|Neither the (names? .*?|authors?) nor the names of( (its|their|other|any))? contributors may) be used to endorse or promote products derived from this software/i) {
745					$license = "BSD (3 clause) $license";
746			} elsif ($licensetext =~ /Redistributions in binary form must reproduce the above copyright notice/i) {
747					$license = "BSD (2 clause) $license";
748			} else {
749					$license = "BSD $license";
750			}
751		}
752
753		if ($licensetext =~ /Mozilla Public License,? (?:(?:Version|v\.)\s+)?(\d+(?:\.\d+)?)/) {
754			$license = "MPL (v$1) $license";
755		}
756		elsif ($licensetext =~ /Mozilla Public License,? \((?:Version|v\.) (\d+(?:\.\d+)?)\)/) {
757			$license = "MPL (v$1) $license";
758		}
759
760		# match when either:
761		# - the text *begins* with "The Artistic license v2.0" which is (hopefully) the actual artistic license v2.0 text.
762		# - a license grant is found. i.e something like "this is free software, licensed under the artistic license v2.0"
763		if ($licensetext =~ /(?:^\s*|(?:This is free software, licensed|Released|be used|use and modify this (?:module|software)) under (?:the terms of )?)[Tt]he Artistic License ([v\d.]*\d)/) {
764			$license = "Artistic (v$1) $license";
765		}
766
767		if ($licensetext =~ /is free software under the Artistic [Ll]icense/) {
768			$license = "Artistic $license";
769		}
770
771		if ($licensetext =~ /This program is free software; you can redistribute it and\/or modify it under the same terms as Perl itself/) {
772			$license = "Perl $license";
773		}
774
775		if ($licensetext =~ /under the Apache License, Version ([^ ]+)/) {
776			$license = "Apache (v$1) $license";
777		}
778
779		if ($licensetext =~ /(THE BEER-WARE LICENSE)/i) {
780			$license = "Beerware $license";
781		}
782
783		if ($licensetext =~ /distributed under the terms of the FreeType project/i) {
784			$license = "FreeType $license"; # aka FTL see https://www.freetype.org/license.html
785		}
786
787		if ($licensetext =~ /This source file is subject to version ([^ ]+) of the PHP license/) {
788			$license = "PHP (v$1) $license";
789		}
790
791		if ($licensetext =~ /under the terms of the CeCILL /) {
792			$license = "CeCILL $license";
793		}
794
795		if ($licensetext =~ /under the terms of the CeCILL-([^ ]+) /) {
796			$license = "CeCILL-$1 $license";
797		}
798
799		if ($licensetext =~ /under the SGI Free Software License B/) {
800			$license = "SGI Free Software License B $license";
801		}
802
803		if ($licensetext =~ /is in the public domain/i) {
804			$license = "Public domain $license";
805		}
806
807		if ($licensetext =~ /terms of the Common Development and Distribution License(, Version ([^(]+))? \(the License\)/) {
808			$license = "CDDL " . ($1 ? "(v$2) " : '') . $license;
809		}
810
811		if ($licensetext =~ /Microsoft Permissive License \(Ms-PL\)/) {
812			$license = "Ms-PL $license";
813		}
814
815		if ($licensetext =~ /Licensed under the Academic Free License version ([\d.]+)/) {
816			$license = $1 ? "AFL-$1" : "AFL";
817		}
818
819		if ($licensetext =~ /This program and the accompanying materials are made available under the terms of the Eclipse Public License v?([\d.]+)/) {
820			$license = $1 ? "EPL-$1" : "EPL";
821		}
822
823		# quotes were removed by clean_comments function
824		if ($licensetext =~ /Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license \(the Software\)/ or
825			$licensetext =~ /Boost Software License([ ,-]+Version ([^ ]+)?(\.))/i) {
826			$license = "BSL " . ($1 ? "(v$2) " : '') . $license;
827		}
828
829		if ($licensetext =~ /PYTHON SOFTWARE FOUNDATION LICENSE (VERSION ([^ ]+))/i) {
830			$license = "PSF " . ($1 ? "(v$2) " : '') . $license;
831		}
832
833		if ($licensetext =~ /The origin of this software must not be misrepresented.*Altered source versions must be plainly marked as such.*This notice may not be removed or altered from any source distribution/ or
834				$licensetext =~ /see copyright notice in zlib\.h/) {
835			$license = "zlib/libpng $license";
836		} elsif ($licensetext =~ /This code is released under the libpng license/) {
837			$license = "libpng $license";
838		}
839
840		if ($licensetext =~ /Do What The Fuck You Want To Public License, Version ([^, ]+)/i) {
841			$license = "WTFPL (v$1) $license";
842		}
843
844		if ($licensetext =~ /Do what The Fuck You Want To Public License/i) {
845			$license = "WTFPL $license";
846		}
847
848		if ($licensetext =~ /(License WTFPL|Under (the|a) WTFPL)/i) {
849			$license = "WTFPL $license";
850		}
851
852		if ($licensetext =~ /SPDX-License-Identifier:\s+\(([a-zA-Z0-9-\.]+)\s+OR\s+([a-zA-Z0-9-\.]+)\)/i) {
853			my $license1 = $1;
854			my $license2 = $2;
855			$license = parselicense("SPDX-License-Identifier: $license1") . ";" . parselicense("SPDX-License-Identifier: $license2");
856		}
857
858		$license = "UNKNOWN" if (!length($license));
859
860		# Remove trailing spaces.
861		$license =~ s/\s+$//;
862
863		return $license;
864}
865
866sub fatal {
867		my ($pack,$file,$line);
868		($pack,$file,$line) = caller();
869		(my $msg = "$progname: fatal error at line $line:\n@_\n") =~ tr/\0//d;
870		$msg =~ s/\n\n$/\n/;
871		die $msg;
872}
873