1#!/usr/bin/perl 2# -*- tab-width: 8; indent-tabs-mode: t; cperl-indent-level: 4 -*- 3# This script was originally based on the script of the same name from 4# the KDE SDK (by dfaure@kde.org) 5# 6# This version is 7# Copyright (C) 2007, 2008 Adam D. Barratt 8# Copyright (C) 2012 Francesco Poli 9# 10# This program is free software; you can redistribute it and/or modify 11# it under the terms of the GNU General Public License as published by 12# the Free Software Foundation; either version 2 of the License, or 13# (at your option) any later version. 14# 15# This program is distributed in the hope that it will be useful, 16# but WITHOUT ANY WARRANTY; without even the implied warranty of 17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18# GNU General Public License for more details. 19# 20# You should have received a copy of the GNU General Public License along 21# with this program. If not, see <https://www.gnu.org/licenses/>. 22 23# Originally copied from Debian's devscripts. A more modern version of 24# this can be found at 25# https://anonscm.debian.org/git/pkg-perl/packages/licensecheck.git/ 26 27=head1 NAME 28 29licensecheck - simple license checker for source files 30 31=head1 SYNOPSIS 32 33B<licensecheck> B<--help>|B<--version> 34 35B<licensecheck> [B<--no-conf>] [B<--verbose>] [B<--copyright>] 36[B<-l>|B<--lines=>I<N>] [B<-i>|B<--ignore=>I<regex>] [B<-c>|B<--check=>I<regex>] 37[B<-m>|B<--machine>] [B<-r>|B<--recursive>] [B<-e>|B<--encoding=>I<...>] 38I<list of files and directories to check> 39 40=head1 DESCRIPTION 41 42B<licensecheck> attempts to determine the license that applies to each file 43passed to it, by searching the start of the file for text belonging to 44various licenses. 45 46If any of the arguments passed are directories, B<licensecheck> will add 47the files contained within to the list of files to process. 48 49=head1 OPTIONS 50 51=over 4 52 53=item B<--verbose>, B<--no-verbose> 54 55Specify whether to output the text being processed from each file before 56the corresponding license information. 57 58Default is to be quiet. 59 60=item B<-l=>I<N>, B<--lines=>I<N> 61 62Specify the number of lines of each file's header which should be parsed 63for license information. (Default is 60). 64 65=item B<--tail=>I<N> 66 67By default, the last 5k bytes of each files are parsed to get license 68information. You may use this option to set the size of this parsed chunk. 69You may set this value to 0 to avoid parsing the end of the file. 70 71=item B<-i=>I<regex>, B<--ignore=>I<regex> 72 73When processing the list of files and directories, the regular 74expression specified by this option will be used to indicate those which 75should not be considered (e.g. backup files, VCS metadata). 76 77=item B<-r>, B<--recursive> 78 79Specify that the contents of directories should be added 80recursively. 81 82=item B<-c=>I<regex>, B<--check=>I<regex> 83 84Specify a pattern against which filenames will be matched in order to 85decide which files to check the license of. 86 87The default includes common source files. 88 89=item B<-s>, B<--skipped> 90 91Specify whether to show skipped files, i.e. files found which do not 92match the check regexp (see C<--check> option). Default is to not show 93skipped files. 94 95Note that ignored files (like C<.git> or C<.svn>) are not shown even when 96this option is used. 97 98=item B<--copyright> 99 100Also display copyright text found within the file 101 102=item B<-e> B<--encoding> 103 104Specifies input encoding of source files. By default, input files are 105not decoded. When encoding is specified, license and copyright 106information are printed on STDOUT as utf8, or garbage if you got the 107encoding wrong. 108 109=item B<-m>, B<--machine> 110 111Display the information in a machine readable way, i.e. in the form 112<file><tab><license>[<tab><copyright>] so that it can be easily sorted 113and/or filtered, e.g. with the B<awk> and B<sort> commands. 114Note that using the B<--verbose> option will kill the readability. 115 116=item B<--no-conf>, B<--noconf> 117 118Do not read any configuration files. This can only be used as the first 119option given on the command line. 120 121=back 122 123=head1 CONFIGURATION VARIABLES 124 125The two configuration files F</etc/devscripts.conf> and 126F<~/.devscripts> are sourced by a shell in that order to set 127configuration variables. Command line options can be used to override 128configuration file settings. Environment variable settings are 129ignored for this purpose. The currently recognised variables are: 130 131=over 4 132 133=item B<LICENSECHECK_VERBOSE> 134 135If this is set to I<yes>, then it is the same as the B<--verbose> command 136line parameter being used. The default is I<no>. 137 138=item B<LICENSECHECK_PARSELINES> 139 140If this is set to a positive number then the specified number of lines 141at the start of each file will be read whilst attempting to determine 142the license(s) in use. This is equivalent to the B<--lines> command line 143option. 144 145=back 146 147=head1 LICENSE 148 149This code is copyright by Adam D. Barratt <I<adam@adam-barratt.org.uk>>, 150all rights reserved; based on a script of the same name from the KDE 151SDK, which is copyright by <I<dfaure@kde.org>>. 152This program comes with ABSOLUTELY NO WARRANTY. 153You are free to redistribute this code under the terms of the GNU 154General Public License, version 2 or later. 155 156=head1 AUTHOR 157 158Adam D. Barratt <adam@adam-barratt.org.uk> 159 160=cut 161 162# see https://stackoverflow.com/questions/6162484/why-does-modern-perl-avoid-utf-8-by-default/6163129#6163129 163use v5.14; 164use utf8; 165 166use strict; 167use autodie; 168use warnings; 169use warnings qw< FATAL utf8 >; 170 171use Getopt::Long qw(:config gnu_getopt); 172use File::Basename; 173use File::stat; 174use IO::File; 175use Fcntl qw/:seek/; 176 177binmode STDOUT, ':utf8'; 178 179my $progname = basename($0); 180 181# From dpkg-source 182my $default_ignore_regex = qr! 183# Ignore general backup files 184~$| 185# Ignore emacs recovery files 186(?:^|/)\.#| 187# Ignore vi swap files 188(?:^|/)\..*\.swp$| 189# Ignore baz-style junk files or directories 190(?:^|/),,.*(?:$|/.*$)| 191# File-names that should be ignored (never directories) 192(?:^|/)(?:DEADJOE|\.cvsignore|\.arch-inventory|\.bzrignore|\.gitignore)$| 193# File or directory names that should be ignored 194(?:^|/)(?:CVS|RCS|\.pc|\.deps|\{arch\}|\.arch-ids|\.svn|\.hg|_darcs|\.git| 195\.shelf|_MTN|\.bzr(?:\.backup|tags)?)(?:$|/.*$) 196!x; 197 198my $default_check_regex = 199 qr! 200 \.( # search for file suffix 201 c(c|pp|xx)? # c and c++ 202 |h(h|pp|xx)? # header files for c and c++ 203 |S 204 |css|less # HTML css and similar 205 |f(77|90)? 206 |go 207 |groovy 208 |lisp 209 |scala 210 |clj 211 |p(l|m)?6?|t|xs|pod6? # perl5 or perl6 212 |sh 213 |php 214 |py(|x) 215 |rb 216 |java 217 |js 218 |vala 219 |el 220 |sc(i|e) 221 |cs 222 |pas 223 |inc 224 |dtd|xsl 225 |mod 226 |m 227 |md|markdown 228 |tex 229 |mli? 230 |(c|l)?hs 231 ) 232 $ 233 !x; 234 235# also used to cleanup 236my $copyright_indicator_regex 237 = qr! 238 (?:copyright # The full word 239 |copr\. # Legally-valid abbreviation 240 |\xc2\xa9 # Unicode copyright sign encoded in iso8859 241 |\x{00a9} # Unicode character COPYRIGHT SIGN 242 #|© # Unicode character COPYRIGHT SIGN 243 |\(c\) # Legally-null representation of sign 244 ) 245 !lix; 246 247my $copyright_indicator_regex_with_capture = qr!$copyright_indicator_regex(?::\s*|\s+)(\S.*)$!lix; 248 249# avoid ditching things like <info@foo.com> 250my $copyright_disindicator_regex 251 = qr{ 252 \b(?:info(?:rmation)?(?!@) # Discussing copyright information 253 |(notice|statement|claim|string)s? # Discussing the notice 254 |is|in|to # Part of a sentence 255 |(holder|owner)s? # Part of a sentence 256 |ownership # Part of a sentence 257 )\b 258 }ix; 259 260my $copyright_predisindicator_regex 261 = qr!( 262 ^[#]define\s+.*\(c\) # #define foo(c) -- not copyright 263 )!ix; 264 265my $modified_conf_msg; 266 267my %OPT=( 268 verbose => '', 269 lines => '', 270 noconf => '', 271 ignore => '', 272 check => '', 273 recursive => 0, 274 copyright => 0, 275 machine => 0, 276 text => 0, 277 skipped => 0, 278); 279 280my $def_lines = 60; 281my $def_tail = 5000; # roughly 60 lines of 80 chars 282 283# Read configuration files and then command line 284# This is boilerplate 285 286if (@ARGV and $ARGV[0] =~ /^--no-?conf$/) { 287 $modified_conf_msg = " (no configuration files read)"; 288 shift; 289} else { 290 my @config_files = ('/etc/devscripts.conf', '~/.devscripts'); 291 my %config_vars = ( 292 'LICENSECHECK_VERBOSE' => 'no', 293 'LICENSECHECK_PARSELINES' => $def_lines, 294 ); 295 my %config_default = %config_vars; 296 297 my $shell_cmd; 298 # Set defaults 299 foreach my $var (keys %config_vars) { 300 $shell_cmd .= qq[$var="$config_vars{$var}";\n]; 301 } 302 $shell_cmd .= 'for file in ' . join(" ", @config_files) . "; do\n"; 303 $shell_cmd .= '[ -f $file ] && . $file; done;' . "\n"; 304 # Read back values 305 foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" } 306 my $shell_out = `/bin/bash -c '$shell_cmd'`; 307 @config_vars{keys %config_vars} = split /\n/, $shell_out, -1; 308 309 # Check validity 310 $config_vars{'LICENSECHECK_VERBOSE'} =~ /^(yes|no)$/ 311 or $config_vars{'LICENSECHECK_VERBOSE'} = 'no'; 312 $config_vars{'LICENSECHECK_PARSELINES'} =~ /^[1-9][0-9]*$/ 313 or $config_vars{'LICENSECHECK_PARSELINES'} = $def_lines; 314 315 foreach my $var (sort keys %config_vars) { 316 if ($config_vars{$var} ne $config_default{$var}) { 317 $modified_conf_msg .= " $var=$config_vars{$var}\n"; 318 } 319 } 320 $modified_conf_msg ||= " (none)\n"; 321 chomp $modified_conf_msg; 322 323 $OPT{'verbose'} = $config_vars{'LICENSECHECK_VERBOSE'} eq 'yes' ? 1 : 0; 324 $OPT{'lines'} = $config_vars{'LICENSECHECK_PARSELINES'}; 325} 326 327GetOptions(\%OPT, 328 "help|h", 329 "check|c=s", 330 "copyright", 331 "encoding|e=s", 332 "ignore|i=s", 333 "lines|l=i", 334 "machine|m", 335 "noconf|no-conf", 336 "recursive|r", 337 "skipped|s", 338 "tail", 339 "text|t", 340 "verbose!", 341 "version|v", 342) or die "Usage: $progname [options] filelist\nRun $progname --help for more details\n"; 343 344$OPT{'lines'} = $def_lines if $OPT{'lines'} !~ /^[1-9][0-9]*$/; 345my $ignore_regex = length($OPT{ignore}) ? qr/$OPT{ignore}/ : $default_ignore_regex; 346 347my $check_regex = $default_check_regex; 348$check_regex = qr/$OPT{check}/ if length $OPT{check}; 349 350if ($OPT{'noconf'}) { 351 fatal("--no-conf is only acceptable as the first command-line option!"); 352} 353if ($OPT{'help'}) { help(); exit 0; } 354if ($OPT{'version'}) { version(); exit 0; } 355 356if ($OPT{text}) { 357 warn "$0 warning: option -text is deprecated\n"; # remove -text end 2015 358} 359 360die "Usage: $progname [options] filelist\nRun $progname --help for more details\n" unless @ARGV; 361 362$OPT{'lines'} = $def_lines if not defined $OPT{'lines'}; 363 364my @files = (); 365my @find_args = (); 366my $files_count = @ARGV; 367 368push @find_args, qw(-maxdepth 1) unless $OPT{'recursive'}; 369push @find_args, qw(-follow -type f -print); 370 371while (@ARGV) { 372 my $file = shift @ARGV; 373 374 if (-d $file) { 375 open my $FIND, '-|', 'find', $file, @find_args 376 or die "$progname: couldn't exec find: $!\n"; 377 378 while (my $found = <$FIND>) { 379 chomp ($found); 380 # Silently skip empty files or ignored files 381 next if -z $found or $found =~ $ignore_regex; 382 if ( not $check_regex or $found =~ $check_regex ) { 383 # Silently skip empty files or ignored files 384 push @files, $found ; 385 } 386 else { 387 warn "skipped file $found\n" if $OPT{skipped}; 388 } 389 } 390 close $FIND; 391 } 392 elsif ($file =~ $ignore_regex) { 393 # Silently skip ignored files 394 next; 395 } 396 elsif ( $files_count == 1 or not $check_regex or $file =~ $check_regex ) { 397 push @files, $file; 398 } 399 else { 400 warn "skipped file $file\n" if $OPT{skipped}; 401 } 402} 403 404while (@files) { 405 my $file = shift @files; 406 my $content = ''; 407 my $copyright_match; 408 my $copyright = ''; 409 410 my $st = stat $file; 411 412 my $enc = $OPT{encoding} ; 413 my $mode = $enc ? "<:encoding($enc)" : '<'; 414 # need to use "<" when encoding is unknown otherwise we break compatibility 415 my $fh = IO::File->new ($file ,$mode) or die "Unable to access $file\n"; 416 417 while ( my $line = $fh->getline ) { 418 last if ($fh->input_line_number > $OPT{'lines'}); 419 $content .= $line; 420 } 421 422 my %copyrights = extract_copyright($content); 423 424 print qq(----- $file header -----\n$content----- end header -----\n\n) 425 if $OPT{'verbose'}; 426 427 my $license = parselicense(clean_cruft_and_spaces(clean_comments($content))); 428 $copyright = join(" / ", reverse sort values %copyrights); 429 430 if ( not $copyright and $license eq 'UNKNOWN') { 431 my $position = $fh->tell; # See IO::Seekable 432 my $tail_size = $OPT{tail} // $def_tail; 433 my $jump = $st->size - $tail_size; 434 $jump = $position if $jump < $position; 435 436 my $tail ; 437 if ( $tail_size and $jump < $st->size) { 438 $fh->seek($jump, SEEK_SET) ; # also IO::Seekable 439 $tail .= join('',$fh->getlines); 440 } 441 442 print qq(----- $file tail -----\n$tail----- end tail -----\n\n) 443 if $OPT{'verbose'}; 444 445 %copyrights = extract_copyright($tail); 446 $license = parselicense(clean_cruft_and_spaces(clean_comments($tail))); 447 $copyright = join(" / ", reverse sort values %copyrights); 448 } 449 450 $fh->close; 451 452 if ($OPT{'machine'}) { 453 print "$file\t$license"; 454 print "\t" . ($copyright or "*No copyright*") if $OPT{'copyright'}; 455 print "\n"; 456 } else { 457 print "$file: "; 458 print "*No copyright* " unless $copyright; 459 print $license . "\n"; 460 print " [Copyright: " . $copyright . "]\n" 461 if $copyright and $OPT{'copyright'}; 462 print "\n" if $OPT{'copyright'}; 463 } 464} 465 466sub extract_copyright { 467 my $content = shift; 468 my @c = split /\n/, clean_comments($content); 469 470 my %copyrights; 471 my $lines_after_copyright_block = 0; 472 473 my $in_copyright_block = 0; 474 while (@c) { 475 my $line = shift @c ; 476 my $copyright_match = parse_copyright($line, \$in_copyright_block) ; 477 if ($copyright_match) { 478 while (@c and $copyright_match =~ /\d[,.]?\s*$/) { 479 # looks like copyright end with a year, assume the owner is on next line(s) 480 $copyright_match .= ' '. shift @c; 481 } 482 $copyright_match =~ s/\s+/ /g; 483 $copyright_match =~ s/\s*$//; 484 $copyrights{lc("$copyright_match")} = "$copyright_match"; 485 } 486 elsif (scalar keys %copyrights) { 487 # skip remaining lines if a copyright blocks was found more than 5 lines ago. 488 # so a copyright block may contain up to 5 blank lines, but no more 489 last if $lines_after_copyright_block++ > 5; 490 } 491 } 492 return %copyrights; 493} 494 495sub parse_copyright { 496 my $data = shift ; 497 my $in_copyright_block_ref = shift; 498 my $copyright = ''; 499 my $match; 500 501 if ( $data !~ $copyright_predisindicator_regex) { 502 #print "match against ->$data<-\n"; 503 if ($data =~ $copyright_indicator_regex_with_capture) { 504 $match = $1; 505 $$in_copyright_block_ref = 1; 506 # Ignore lines matching "see foo for copyright information" etc. 507 if ($match !~ $copyright_disindicator_regex) { 508 # De-cruft 509 $match =~ s/$copyright_indicator_regex//igx; 510 $match =~ s/^\s+//; 511 $match =~ s/\s*\bby\b\s*/ /; 512 $match =~ s/([,.])?\s*$//; 513 $match =~ s/\s{2,}/ /g; 514 $match =~ s/\\//g; # de-cruft nroff files 515 $match =~ s/\s*[*#]\s*$//; 516 $copyright = $match; 517 } 518 } 519 elsif ($$in_copyright_block_ref and $data =~ /^\d{2,}[,\s]+/) { 520 # following lines beginning with a year are supposed to be 521 # continued copyright blocks 522 $copyright = $data; 523 } 524 else { 525 $$in_copyright_block_ref = 0; 526 } 527 } 528 529 return $copyright; 530} 531 532sub clean_comments { 533 local $_ = shift or return q{}; 534 535 # Remove generic comments: look for 4 or more lines beginning with 536 # regular comment pattern and trim it. Fall back to old algorithm 537 # if no such pattern found. 538 my @matches = m/^\s*((?:[^a-zA-Z0-9\s]{1,3}|\bREM\b))\s\w/mg; 539 if (@matches >= 4) { 540 my $comment_re = qr/\s*[\Q$matches[0]\E]{1,3}\s*/; 541 s/^$comment_re//mg; 542 } 543 544 # Remove Fortran comments 545 s/^[cC] //gm; 546 547 # Remove C / C++ comments 548 s#(\*/|/[/*])##g; 549 550 return $_; 551} 552 553sub clean_cruft_and_spaces { 554 local $_ = shift or return q{}; 555 556 tr/\t\r\n/ /; 557 558 # this also removes quotes 559 tr% A-Za-z.+,@:;0-9\(\)/-%%cd; 560 tr/ //s; 561 562 return $_; 563} 564 565sub help { 566 print <<"EOF"; 567Usage: $progname [options] filename [filename ...] 568Valid options are: 569 --help, -h Display this message 570 --version, -v Display version and copyright info 571 --no-conf, --noconf Don't read devscripts config files; must be 572 the first option given 573 --verbose Display the header of each file before its 574 license information 575 --skipped, -s Show skipped files 576 --lines, -l Specify how many lines of the file header 577 should be parsed for license information 578 (Default: $def_lines) 579 --tail Specify how many bytes to parse at end of file 580 (Default: $def_tail) 581 --check, -c Specify a pattern indicating which files should 582 be checked 583 (Default: '$default_check_regex') 584 --machine, -m Display in a machine readable way (good for awk) 585 --recursive, -r Add the contents of directories recursively 586 --copyright Also display the file's copyright 587 --ignore, -i Specify that files / directories matching the 588 regular expression should be ignored when 589 checking files 590 (Default: '$default_ignore_regex') 591 592Default settings modified by devscripts configuration files: 593$modified_conf_msg 594EOF 595} 596 597sub version { 598 print <<"EOF"; 599This is $progname, from the Debian devscripts package, version 2.16.2 600Copyright (C) 2007, 2008 by Adam D. Barratt <adam\@adam-barratt.org.uk>; based 601on a script of the same name from the KDE SDK by <dfaure\@kde.org>. 602 603This program comes with ABSOLUTELY NO WARRANTY. 604You are free to redistribute this code under the terms of the 605GNU General Public License, version 2, or (at your option) any 606later version. 607EOF 608} 609 610sub parselicense { 611 my ($licensetext) = @_; 612 613 my $gplver = ""; 614 my $extrainfo = ""; 615 my $license = ""; 616 617 if ($licensetext =~ /version ([^ ]+)(?: of the License)?,? or(?: \(at your option\))? version (\d(?:[.-]\d+)*)/) { 618 $gplver = " (v$1 or v$2)"; 619 } elsif ($licensetext =~ /version ([^, ]+?)[.,]? (?:\(?only\)?.? )?(?:of the GNU (Affero )?(Lesser |Library )?General Public License )?(as )?published by the Free Software Foundation/i or 620 $licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License (?:as )?published by the Free Software Foundation[;,] version ([^, ]+?)[.,]? /i) { 621 622 $gplver = " (v$1)"; 623 } elsif ($licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License\s*(?:[(),GPL]+)\s*version (\d+(?:\.\d+)?)[ \.]/i) { 624 $gplver = " (v$1)"; 625 } elsif ($licensetext =~ /either version ([^ ]+)(?: of the License)?, or (?:\(at your option\) )?any later version/) { 626 $gplver = " (v$1 or later)"; 627 } elsif ($licensetext =~ /GPL\sas\spublished\sby\sthe\sFree\sSoftware\sFoundation,\sversion\s([\d.]+)/i ) { 628 $gplver = " (v$1)"; 629 } elsif ($licensetext =~ /SPDX-License-Identifier:\s+GPL-([1-9])\.0-or-later/i ){ 630 $gplver = " (v$1 or later)"; 631 } elsif ($licensetext =~ /SPDX-License-Identifier:\s+GPL-([1-9])\.0[^+]/i ) { 632 $gplver = " (v$1)"; 633 } elsif ($licensetext =~ /SPDX-License-Identifier:\s+GPL-([1-9])\.0\+/i ) { 634 $gplver = " (v$1 or later)"; 635 } elsif ($licensetext =~ /SPDX-License-Identifier:\s+LGPL-([1-9])\.[0-1]\-or-later/i ) { 636 $gplver = " (v$1 or later)"; 637 } 638 639 if ($licensetext =~ /(?:675 Mass Ave|59 Temple Place|51 Franklin Steet|02139|02111-1307)/i) { 640 $extrainfo = " (with incorrect FSF address)$extrainfo"; 641 } 642 643 if ($licensetext =~ /permission (?:is (also granted|given))? to link (the code of )?this program with (any edition of )?(Qt|the Qt library)/i) { 644 $extrainfo = " (with Qt exception)$extrainfo" 645 } 646 647 if ($licensetext =~ /As a special exception, you may create a larger work that contains part or all of the Bison parser skeleton and distribute that work under terms of your choice/) { 648 $extrainfo = " (with Bison parser exception)$extrainfo"; 649 } 650 651 # exclude blurb found in boost license text 652 if ($licensetext =~ /(All changes made in this file will be lost|DO NOT (EDIT|delete this file)|Generated (automatically|by|from)|generated.*file)/i 653 and $licensetext !~ /unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor/) { 654 $license = "GENERATED FILE"; 655 } 656 657 if ($licensetext =~ /(are made available|(is free software.? )?you can redistribute (it|them) and(?:\/|\s+)or modify (it|them)|is licensed) under the terms of (version [^ ]+ of )?the (GNU (Library |Lesser )General Public License|LGPL)/i) { 658 $license = "LGPL$gplver$extrainfo $license"; 659 } 660 # For Perl modules handled by Dist::Zilla 661 elsif ($licensetext =~ /this is free software,? licensed under:? (?:the )?(?:GNU (?:Library |Lesser )General Public License|LGPL),? version ([\d\.]+)/i) { 662 $license = "LGPL (v$1) $license"; 663 } 664 665 if ($licensetext =~ /is free software.? you can redistribute (it|them) and(?:\/|\s+)or modify (it|them) under the terms of the (GNU Affero General Public License|AGPL)/i) { 666 $license = "AGPL$gplver$extrainfo $license"; 667 } 668 669 if ($licensetext =~ /(is free software.? )?you (can|may) redistribute (it|them) and(?:\/|\s+)or modify (it|them) under the terms of (?:version [^ ]+ (?:\(?only\)? )?of )?the GNU General Public License/i) { 670 $license = "GPL$gplver$extrainfo $license"; 671 } 672 673 if ($licensetext =~ /is distributed under the terms of the GNU General Public License,/ 674 and length $gplver) { 675 $license = "GPL$gplver$extrainfo $license"; 676 } 677 678 if ($licensetext =~ /SPDX-License-Identifier:\s+GPL/i and length $gplver) { 679 $license = "GPL$gplver$extrainfo $license"; 680 } 681 682 if ($licensetext =~ /SPDX-License-Identifier:\s+GPL-2.0-or-later/i and length $gplver) { 683 $license = "GPL$gplver$extrainfo"; 684 } 685 686 if ($licensetext =~ /SPDX-License-Identifier:\s+LGPL/i and length $gplver) { 687 $license = "LGPL$gplver$extrainfo $license"; 688 } 689 690 if ($licensetext =~ /SPDX-License-Identifier:\s+Zlib/i) { 691 $license = "zlib/libpng $license"; 692 } 693 694 if ($licensetext =~ /SPDX-License-Identifier:\s+BSD-3-Clause/i) { 695 $license = 'BSD (3 clause)'; 696 } 697 698 if ($licensetext =~ /SPDX-License-Identifier:\s+BSD-2-Clause/i) { 699 $license = 'BSD (2 clause)'; 700 } 701 702 if ($licensetext =~ /SPDX-License-Identifier:\s+BSD-1-Clause/i) { 703 $license = 'BSD'; 704 } 705 706 if ($licensetext =~ /SPDX-License-Identifier:\s+MIT/i) { 707 $license = 'MIT/X11 (BSD like)'; 708 } 709 710 if ($licensetext =~ /SPDX-License-Identifier:\s+ISC/i) { 711 $license = 'ISC'; 712 } 713 714 if ($licensetext =~ /(?:is|may be)\s(?:(?:distributed|used).*?terms|being\s+released).*?\b(L?GPL)\b/) { 715 my $v = $gplver || ' (unversioned/unknown version)'; 716 $license = "$1$v $license"; 717 } 718 719 if ($licensetext =~ /the rights to distribute and use this software as governed by the terms of the Lisp Lesser General Public License|\bLLGPL\b/ ) { 720 $license = "LLGPL $license"; 721 } 722 723 if ($licensetext =~ /This file is part of the .*Qt GUI Toolkit. This file may be distributed under the terms of the Q Public License as defined/) { 724 $license = "QPL (part of Qt) $license"; 725 } elsif ($licensetext =~ /may (be distributed|redistribute it) under the terms of the Q Public License/) { 726 $license = "QPL $license"; 727 } 728 729 if ($licensetext =~ /opensource\.org\/licenses\/mit-license\.php/) { 730 $license = "MIT/X11 (BSD like) $license"; 731 } elsif ($licensetext =~ /Permission is hereby granted, free of charge, to any person obtaining a copy of this software and(\/or)? associated documentation files \(the (Software|Materials)\), to deal in the (Software|Materials)/) { 732 $license = "MIT/X11 (BSD like) $license"; 733 } elsif ($licensetext =~ /Permission is hereby granted, without written agreement and without license or royalty fees, to use, copy, modify, and distribute this software and its documentation for any purpose/) { 734 $license = "MIT/X11 (BSD like) $license"; 735 } 736 737 if ($licensetext =~ /Permission to use, copy, modify, and(\/or)? distribute this software for any purpose with or without fee is hereby granted, provided.*copyright notice.*permission notice.*all copies/) { 738 $license = "ISC $license"; 739 } 740 741 if ($licensetext =~ /THIS SOFTWARE IS PROVIDED .*AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY/) { 742 if ($licensetext =~ /All advertising materials mentioning features or use of this software must display the following acknowledge?ment.*This product includes software developed by/i) { 743 $license = "BSD (4 clause) $license"; 744 } elsif ($licensetext =~ /(The name(?:\(s\))? .*? may not|Neither the (names? .*?|authors?) nor the names of( (its|their|other|any))? contributors may) be used to endorse or promote products derived from this software/i) { 745 $license = "BSD (3 clause) $license"; 746 } elsif ($licensetext =~ /Redistributions in binary form must reproduce the above copyright notice/i) { 747 $license = "BSD (2 clause) $license"; 748 } else { 749 $license = "BSD $license"; 750 } 751 } 752 753 if ($licensetext =~ /Mozilla Public License,? (?:(?:Version|v\.)\s+)?(\d+(?:\.\d+)?)/) { 754 $license = "MPL (v$1) $license"; 755 } 756 elsif ($licensetext =~ /Mozilla Public License,? \((?:Version|v\.) (\d+(?:\.\d+)?)\)/) { 757 $license = "MPL (v$1) $license"; 758 } 759 760 # match when either: 761 # - the text *begins* with "The Artistic license v2.0" which is (hopefully) the actual artistic license v2.0 text. 762 # - a license grant is found. i.e something like "this is free software, licensed under the artistic license v2.0" 763 if ($licensetext =~ /(?:^\s*|(?:This is free software, licensed|Released|be used|use and modify this (?:module|software)) under (?:the terms of )?)[Tt]he Artistic License ([v\d.]*\d)/) { 764 $license = "Artistic (v$1) $license"; 765 } 766 767 if ($licensetext =~ /is free software under the Artistic [Ll]icense/) { 768 $license = "Artistic $license"; 769 } 770 771 if ($licensetext =~ /This program is free software; you can redistribute it and\/or modify it under the same terms as Perl itself/) { 772 $license = "Perl $license"; 773 } 774 775 if ($licensetext =~ /under the Apache License, Version ([^ ]+)/) { 776 $license = "Apache (v$1) $license"; 777 } 778 779 if ($licensetext =~ /(THE BEER-WARE LICENSE)/i) { 780 $license = "Beerware $license"; 781 } 782 783 if ($licensetext =~ /distributed under the terms of the FreeType project/i) { 784 $license = "FreeType $license"; # aka FTL see https://www.freetype.org/license.html 785 } 786 787 if ($licensetext =~ /This source file is subject to version ([^ ]+) of the PHP license/) { 788 $license = "PHP (v$1) $license"; 789 } 790 791 if ($licensetext =~ /under the terms of the CeCILL /) { 792 $license = "CeCILL $license"; 793 } 794 795 if ($licensetext =~ /under the terms of the CeCILL-([^ ]+) /) { 796 $license = "CeCILL-$1 $license"; 797 } 798 799 if ($licensetext =~ /under the SGI Free Software License B/) { 800 $license = "SGI Free Software License B $license"; 801 } 802 803 if ($licensetext =~ /is in the public domain/i) { 804 $license = "Public domain $license"; 805 } 806 807 if ($licensetext =~ /terms of the Common Development and Distribution License(, Version ([^(]+))? \(the License\)/) { 808 $license = "CDDL " . ($1 ? "(v$2) " : '') . $license; 809 } 810 811 if ($licensetext =~ /Microsoft Permissive License \(Ms-PL\)/) { 812 $license = "Ms-PL $license"; 813 } 814 815 if ($licensetext =~ /Licensed under the Academic Free License version ([\d.]+)/) { 816 $license = $1 ? "AFL-$1" : "AFL"; 817 } 818 819 if ($licensetext =~ /This program and the accompanying materials are made available under the terms of the Eclipse Public License v?([\d.]+)/) { 820 $license = $1 ? "EPL-$1" : "EPL"; 821 } 822 823 # quotes were removed by clean_comments function 824 if ($licensetext =~ /Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license \(the Software\)/ or 825 $licensetext =~ /Boost Software License([ ,-]+Version ([^ ]+)?(\.))/i) { 826 $license = "BSL " . ($1 ? "(v$2) " : '') . $license; 827 } 828 829 if ($licensetext =~ /PYTHON SOFTWARE FOUNDATION LICENSE (VERSION ([^ ]+))/i) { 830 $license = "PSF " . ($1 ? "(v$2) " : '') . $license; 831 } 832 833 if ($licensetext =~ /The origin of this software must not be misrepresented.*Altered source versions must be plainly marked as such.*This notice may not be removed or altered from any source distribution/ or 834 $licensetext =~ /see copyright notice in zlib\.h/) { 835 $license = "zlib/libpng $license"; 836 } elsif ($licensetext =~ /This code is released under the libpng license/) { 837 $license = "libpng $license"; 838 } 839 840 if ($licensetext =~ /Do What The Fuck You Want To Public License, Version ([^, ]+)/i) { 841 $license = "WTFPL (v$1) $license"; 842 } 843 844 if ($licensetext =~ /Do what The Fuck You Want To Public License/i) { 845 $license = "WTFPL $license"; 846 } 847 848 if ($licensetext =~ /(License WTFPL|Under (the|a) WTFPL)/i) { 849 $license = "WTFPL $license"; 850 } 851 852 if ($licensetext =~ /SPDX-License-Identifier:\s+\(([a-zA-Z0-9-\.]+)\s+OR\s+([a-zA-Z0-9-\.]+)\)/i) { 853 my $license1 = $1; 854 my $license2 = $2; 855 $license = parselicense("SPDX-License-Identifier: $license1") . ";" . parselicense("SPDX-License-Identifier: $license2"); 856 } 857 858 $license = "UNKNOWN" if (!length($license)); 859 860 # Remove trailing spaces. 861 $license =~ s/\s+$//; 862 863 return $license; 864} 865 866sub fatal { 867 my ($pack,$file,$line); 868 ($pack,$file,$line) = caller(); 869 (my $msg = "$progname: fatal error at line $line:\n@_\n") =~ tr/\0//d; 870 $msg =~ s/\n\n$/\n/; 871 die $msg; 872} 873