1#!/usr/bin/env perl
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22
23my $cur_path = fastgetcwd() . '/';
24my $lk_path = "./";
25my $email = 1;
26my $email_usename = 1;
27my $email_maintainer = 1;
28my $email_reviewer = 1;
29my $email_list = 1;
30my $email_subscriber_list = 0;
31my $email_git_penguin_chiefs = 0;
32my $email_git = 0;
33my $email_git_all_signature_types = 0;
34my $email_git_blame = 0;
35my $email_git_blame_signatures = 1;
36my $email_git_fallback = 1;
37my $email_git_min_signatures = 1;
38my $email_git_max_maintainers = 5;
39my $email_git_min_percent = 5;
40my $email_git_since = "1-year-ago";
41my $email_hg_since = "-365";
42my $interactive = 0;
43my $email_remove_duplicates = 1;
44my $email_use_mailmap = 1;
45my $output_multiline = 1;
46my $output_separator = ", ";
47my $output_roles = 0;
48my $output_rolestats = 1;
49my $output_section_maxlen = 50;
50my $scm = 0;
51my $web = 0;
52my $subsystem = 0;
53my $status = 0;
54my $letters = "";
55my $keywords = 1;
56my $sections = 0;
57my $file_emails = 0;
58my $from_filename = 0;
59my $pattern_depth = 0;
60my $self_test = undef;
61my $version = 0;
62my $help = 0;
63my $find_maintainer_files = 1;
64
65my $vcs_used = 0;
66
67my $exit = 0;
68
69my %commit_author_hash;
70my %commit_signer_hash;
71
72my @penguin_chief = ();
73push(@penguin_chief, "Tom Rini:trini\@konsulko.com");
74
75my @penguin_chief_names = ();
76foreach my $chief (@penguin_chief) {
77    if ($chief =~ m/^(.*):(.*)/) {
78	my $chief_name = $1;
79	my $chief_addr = $2;
80	push(@penguin_chief_names, $chief_name);
81    }
82}
83my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
84
85# Signature types of people who are either
86# 	a) responsible for the code in question, or
87# 	b) familiar enough with it to give relevant feedback
88my @signature_tags = ();
89push(@signature_tags, "Signed-off-by:");
90push(@signature_tags, "Reviewed-by:");
91push(@signature_tags, "Acked-by:");
92
93my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
94
95# rfc822 email address - preloaded methods go here.
96my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
97my $rfc822_char = '[\\000-\\377]';
98
99# VCS command support: class-like functions and strings
100
101my %VCS_cmds;
102
103my %VCS_cmds_git = (
104    "execute_cmd" => \&git_execute_cmd,
105    "available" => '(which("git") ne "") && (-e ".git")',
106    "find_signers_cmd" =>
107	"git log --no-color --follow --since=\$email_git_since " .
108	    '--numstat --no-merges ' .
109	    '--format="GitCommit: %H%n' .
110		      'GitAuthor: %an <%ae>%n' .
111		      'GitDate: %aD%n' .
112		      'GitSubject: %s%n' .
113		      '%b%n"' .
114	    " -- \$file",
115    "find_commit_signers_cmd" =>
116	"git log --no-color " .
117	    '--numstat ' .
118	    '--format="GitCommit: %H%n' .
119		      'GitAuthor: %an <%ae>%n' .
120		      'GitDate: %aD%n' .
121		      'GitSubject: %s%n' .
122		      '%b%n"' .
123	    " -1 \$commit",
124    "find_commit_author_cmd" =>
125	"git log --no-color " .
126	    '--numstat ' .
127	    '--format="GitCommit: %H%n' .
128		      'GitAuthor: %an <%ae>%n' .
129		      'GitDate: %aD%n' .
130		      'GitSubject: %s%n"' .
131	    " -1 \$commit",
132    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
133    "blame_file_cmd" => "git blame -l \$file",
134    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
135    "blame_commit_pattern" => "^([0-9a-f]+) ",
136    "author_pattern" => "^GitAuthor: (.*)",
137    "subject_pattern" => "^GitSubject: (.*)",
138    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
139    "file_exists_cmd" => "git ls-files \$file",
140    "list_files_cmd" => "git ls-files \$file",
141);
142
143my %VCS_cmds_hg = (
144    "execute_cmd" => \&hg_execute_cmd,
145    "available" => '(which("hg") ne "") && (-d ".hg")',
146    "find_signers_cmd" =>
147	"hg log --date=\$email_hg_since " .
148	    "--template='HgCommit: {node}\\n" .
149	                "HgAuthor: {author}\\n" .
150			"HgSubject: {desc}\\n'" .
151	    " -- \$file",
152    "find_commit_signers_cmd" =>
153	"hg log " .
154	    "--template='HgSubject: {desc}\\n'" .
155	    " -r \$commit",
156    "find_commit_author_cmd" =>
157	"hg log " .
158	    "--template='HgCommit: {node}\\n" .
159		        "HgAuthor: {author}\\n" .
160			"HgSubject: {desc|firstline}\\n'" .
161	    " -r \$commit",
162    "blame_range_cmd" => "",		# not supported
163    "blame_file_cmd" => "hg blame -n \$file",
164    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
165    "blame_commit_pattern" => "^([ 0-9a-f]+):",
166    "author_pattern" => "^HgAuthor: (.*)",
167    "subject_pattern" => "^HgSubject: (.*)",
168    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
169    "file_exists_cmd" => "hg files \$file",
170    "list_files_cmd" => "hg manifest -R \$file",
171);
172
173my $conf = which_conf(".get_maintainer.conf");
174if (-f $conf) {
175    my @conf_args;
176    open(my $conffile, '<', "$conf")
177	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
178
179    while (<$conffile>) {
180	my $line = $_;
181
182	$line =~ s/\s*\n?$//g;
183	$line =~ s/^\s*//g;
184	$line =~ s/\s+/ /g;
185
186	next if ($line =~ m/^\s*#/);
187	next if ($line =~ m/^\s*$/);
188
189	my @words = split(" ", $line);
190	foreach my $word (@words) {
191	    last if ($word =~ m/^#/);
192	    push (@conf_args, $word);
193	}
194    }
195    close($conffile);
196    unshift(@ARGV, @conf_args) if @conf_args;
197}
198
199my @ignore_emails = ();
200my $ignore_file = which_conf(".get_maintainer.ignore");
201if (-f $ignore_file) {
202    open(my $ignore, '<', "$ignore_file")
203	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
204    while (<$ignore>) {
205	my $line = $_;
206
207	$line =~ s/\s*\n?$//;
208	$line =~ s/^\s*//;
209	$line =~ s/\s+$//;
210	$line =~ s/#.*$//;
211
212	next if ($line =~ m/^\s*$/);
213	if (rfc822_valid($line)) {
214	    push(@ignore_emails, $line);
215	}
216    }
217    close($ignore);
218}
219
220if ($#ARGV > 0) {
221    foreach (@ARGV) {
222        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
223            die "$P: using --self-test does not allow any other option or argument\n";
224        }
225    }
226}
227
228if (!GetOptions(
229		'email!' => \$email,
230		'git!' => \$email_git,
231		'git-all-signature-types!' => \$email_git_all_signature_types,
232		'git-blame!' => \$email_git_blame,
233		'git-blame-signatures!' => \$email_git_blame_signatures,
234		'git-fallback!' => \$email_git_fallback,
235		'git-chief-penguins!' => \$email_git_penguin_chiefs,
236		'git-min-signatures=i' => \$email_git_min_signatures,
237		'git-max-maintainers=i' => \$email_git_max_maintainers,
238		'git-min-percent=i' => \$email_git_min_percent,
239		'git-since=s' => \$email_git_since,
240		'hg-since=s' => \$email_hg_since,
241		'i|interactive!' => \$interactive,
242		'remove-duplicates!' => \$email_remove_duplicates,
243		'mailmap!' => \$email_use_mailmap,
244		'm!' => \$email_maintainer,
245		'r!' => \$email_reviewer,
246		'n!' => \$email_usename,
247		'l!' => \$email_list,
248		's!' => \$email_subscriber_list,
249		'multiline!' => \$output_multiline,
250		'roles!' => \$output_roles,
251		'rolestats!' => \$output_rolestats,
252		'separator=s' => \$output_separator,
253		'subsystem!' => \$subsystem,
254		'status!' => \$status,
255		'scm!' => \$scm,
256		'web!' => \$web,
257		'letters=s' => \$letters,
258		'pattern-depth=i' => \$pattern_depth,
259		'k|keywords!' => \$keywords,
260		'sections!' => \$sections,
261		'fe|file-emails!' => \$file_emails,
262		'f|file' => \$from_filename,
263		'find-maintainer-files' => \$find_maintainer_files,
264		'self-test:s' => \$self_test,
265		'v|version' => \$version,
266		'h|help|usage' => \$help,
267		)) {
268    die "$P: invalid argument - use --help if necessary\n";
269}
270
271if ($help != 0) {
272    usage();
273    exit 0;
274}
275
276if ($version != 0) {
277    print("${P} ${V}\n");
278    exit 0;
279}
280
281if (defined $self_test) {
282    read_all_maintainer_files();
283    self_test();
284    exit 0;
285}
286
287if (-t STDIN && !@ARGV) {
288    # We're talking to a terminal, but have no command line arguments.
289    die "$P: missing patchfile or -f file - use --help if necessary\n";
290}
291
292$output_multiline = 0 if ($output_separator ne ", ");
293$output_rolestats = 1 if ($interactive);
294$output_roles = 1 if ($output_rolestats);
295
296if ($sections || $letters ne "") {
297    $sections = 1;
298    $email = 0;
299    $email_list = 0;
300    $scm = 0;
301    $status = 0;
302    $subsystem = 0;
303    $web = 0;
304    $keywords = 0;
305    $interactive = 0;
306} else {
307    my $selections = $email + $scm + $status + $subsystem + $web;
308    if ($selections == 0) {
309	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
310    }
311}
312
313if ($email &&
314    ($email_maintainer + $email_reviewer +
315     $email_list + $email_subscriber_list +
316     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
317    die "$P: Please select at least 1 email option\n";
318}
319
320if (!top_of_kernel_tree($lk_path)) {
321    die "$P: The current directory does not appear to be "
322	. "a U-Boot source tree.\n";
323}
324
325## Read MAINTAINERS for type/value pairs
326
327my @typevalue = ();
328my %keyword_hash;
329my @mfiles = ();
330my @self_test_info = ();
331
332sub read_maintainer_file {
333    my ($file) = @_;
334
335    open (my $maint, '<', "$file")
336	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
337    my $i = 1;
338    while (<$maint>) {
339	my $line = $_;
340	chomp $line;
341
342	if ($line =~ m/^([A-Z]):\s*(.*)/) {
343	    my $type = $1;
344	    my $value = $2;
345
346	    ##Filename pattern matching
347	    if ($type eq "F" || $type eq "X") {
348		$value =~ s@\.@\\\.@g;       ##Convert . to \.
349		$value =~ s/\*/\.\*/g;       ##Convert * to .*
350		$value =~ s/\?/\./g;         ##Convert ? to .
351		##if pattern is a directory and it lacks a trailing slash, add one
352		if ((-d $value)) {
353		    $value =~ s@([^/])$@$1/@;
354		}
355	    } elsif ($type eq "K") {
356		$keyword_hash{@typevalue} = $value;
357	    }
358	    push(@typevalue, "$type:$value");
359	} elsif (!(/^\s*$/ || /^\s*\#/)) {
360	    push(@typevalue, $line);
361	}
362	if (defined $self_test) {
363	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
364	}
365	$i++;
366    }
367    close($maint);
368}
369
370sub find_is_maintainer_file {
371    my ($file) = $_;
372    return if ($file !~ m@/MAINTAINERS$@);
373    $file = $File::Find::name;
374    return if (! -f $file);
375    push(@mfiles, $file);
376}
377
378sub find_ignore_git {
379    return grep { $_ !~ /^\.git$/; } @_;
380}
381
382read_all_maintainer_files();
383
384sub read_all_maintainer_files {
385    if (-d "${lk_path}MAINTAINERS") {
386        opendir(DIR, "${lk_path}MAINTAINERS") or die $!;
387        my @files = readdir(DIR);
388        closedir(DIR);
389        foreach my $file (@files) {
390            push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./);
391        }
392    }
393
394    if ($find_maintainer_files) {
395        find( { wanted => \&find_is_maintainer_file,
396                preprocess => \&find_ignore_git,
397                no_chdir => 1,
398        }, "${lk_path}");
399    } else {
400        push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS";
401    }
402
403    foreach my $file (@mfiles) {
404        read_maintainer_file("$file");
405    }
406}
407
408#
409# Read mail address map
410#
411
412my $mailmap;
413
414read_mailmap();
415
416sub read_mailmap {
417    $mailmap = {
418	names => {},
419	addresses => {}
420    };
421
422    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
423
424    open(my $mailmap_file, '<', "${lk_path}.mailmap")
425	or warn "$P: Can't open .mailmap: $!\n";
426
427    while (<$mailmap_file>) {
428	s/#.*$//; #strip comments
429	s/^\s+|\s+$//g; #trim
430
431	next if (/^\s*$/); #skip empty lines
432	#entries have one of the following formats:
433	# name1 <mail1>
434	# <mail1> <mail2>
435	# name1 <mail1> <mail2>
436	# name1 <mail1> name2 <mail2>
437	# (see man git-shortlog)
438
439	if (/^([^<]+)<([^>]+)>$/) {
440	    my $real_name = $1;
441	    my $address = $2;
442
443	    $real_name =~ s/\s+$//;
444	    ($real_name, $address) = parse_email("$real_name <$address>");
445	    $mailmap->{names}->{$address} = $real_name;
446
447	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
448	    my $real_address = $1;
449	    my $wrong_address = $2;
450
451	    $mailmap->{addresses}->{$wrong_address} = $real_address;
452
453	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
454	    my $real_name = $1;
455	    my $real_address = $2;
456	    my $wrong_address = $3;
457
458	    $real_name =~ s/\s+$//;
459	    ($real_name, $real_address) =
460		parse_email("$real_name <$real_address>");
461	    $mailmap->{names}->{$wrong_address} = $real_name;
462	    $mailmap->{addresses}->{$wrong_address} = $real_address;
463
464	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
465	    my $real_name = $1;
466	    my $real_address = $2;
467	    my $wrong_name = $3;
468	    my $wrong_address = $4;
469
470	    $real_name =~ s/\s+$//;
471	    ($real_name, $real_address) =
472		parse_email("$real_name <$real_address>");
473
474	    $wrong_name =~ s/\s+$//;
475	    ($wrong_name, $wrong_address) =
476		parse_email("$wrong_name <$wrong_address>");
477
478	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
479	    $mailmap->{names}->{$wrong_email} = $real_name;
480	    $mailmap->{addresses}->{$wrong_email} = $real_address;
481	}
482    }
483    close($mailmap_file);
484}
485
486## use the filenames on the command line or find the filenames in the patchfiles
487
488my @files = ();
489my @range = ();
490my @keyword_tvi = ();
491my @file_emails = ();
492
493if (!@ARGV) {
494    push(@ARGV, "&STDIN");
495}
496
497foreach my $file (@ARGV) {
498    if ($file ne "&STDIN") {
499	##if $file is a directory and it lacks a trailing slash, add one
500	if ((-d $file)) {
501	    $file =~ s@([^/])$@$1/@;
502	} elsif (!(-f $file)) {
503	    die "$P: file '${file}' not found\n";
504	}
505    }
506    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
507	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
508	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
509	push(@files, $file);
510	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
511	    open(my $f, '<', $file)
512		or die "$P: Can't open $file: $!\n";
513	    my $text = do { local($/) ; <$f> };
514	    close($f);
515	    if ($keywords) {
516		foreach my $line (keys %keyword_hash) {
517		    if ($text =~ m/$keyword_hash{$line}/x) {
518			push(@keyword_tvi, $line);
519		    }
520		}
521	    }
522	    if ($file_emails) {
523		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
524		push(@file_emails, clean_file_emails(@poss_addr));
525	    }
526	}
527    } else {
528	my $file_cnt = @files;
529	my $lastfile;
530
531	open(my $patch, "< $file")
532	    or die "$P: Can't open $file: $!\n";
533
534	# We can check arbitrary information before the patch
535	# like the commit message, mail headers, etc...
536	# This allows us to match arbitrary keywords against any part
537	# of a git format-patch generated file (subject tags, etc...)
538
539	my $patch_prefix = "";			#Parsing the intro
540
541	while (<$patch>) {
542	    my $patch_line = $_;
543	    if (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
544		my $filename = $1;
545		$filename =~ s@^[^/]*/@@;
546		$filename =~ s@\n@@;
547		$lastfile = $filename;
548		push(@files, $filename);
549		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
550	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
551		if ($email_git_blame) {
552		    push(@range, "$lastfile:$1:$2");
553		}
554	    } elsif ($keywords) {
555		foreach my $line (keys %keyword_hash) {
556		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
557			push(@keyword_tvi, $line);
558		    }
559		}
560	    }
561	}
562	close($patch);
563
564	if ($file_cnt == @files) {
565	    warn "$P: file '${file}' doesn't appear to be a patch.  "
566		. "Add -f to options?\n";
567	}
568	@files = sort_and_uniq(@files);
569    }
570}
571
572@file_emails = uniq(@file_emails);
573
574my %email_hash_name;
575my %email_hash_address;
576my @email_to = ();
577my %hash_list_to;
578my @list_to = ();
579my @scm = ();
580my @web = ();
581my @subsystem = ();
582my @status = ();
583my %deduplicate_name_hash = ();
584my %deduplicate_address_hash = ();
585
586my @maintainers = get_maintainers();
587
588if (@maintainers) {
589    @maintainers = merge_email(@maintainers);
590    output(@maintainers);
591}
592
593if ($scm) {
594    @scm = uniq(@scm);
595    output(@scm);
596}
597
598if ($status) {
599    @status = uniq(@status);
600    output(@status);
601}
602
603if ($subsystem) {
604    @subsystem = uniq(@subsystem);
605    output(@subsystem);
606}
607
608if ($web) {
609    @web = uniq(@web);
610    output(@web);
611}
612
613exit($exit);
614
615sub self_test {
616    my @lsfiles = ();
617    my @good_links = ();
618    my @bad_links = ();
619    my @section_headers = ();
620    my $index = 0;
621
622    @lsfiles = vcs_list_files($lk_path);
623
624    for my $x (@self_test_info) {
625	$index++;
626
627	## Section header duplication and missing section content
628	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
629	    $x->{line} =~ /^\S[^:]/ &&
630	    defined $self_test_info[$index] &&
631	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
632	    my $has_S = 0;
633	    my $has_F = 0;
634	    my $has_ML = 0;
635	    my $status = "";
636	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
637		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
638	    } else {
639		push(@section_headers, $x->{line});
640	    }
641	    my $nextline = $index;
642	    while (defined $self_test_info[$nextline] &&
643		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
644		my $type = $1;
645		my $value = $2;
646		if ($type eq "S") {
647		    $has_S = 1;
648		    $status = $value;
649		} elsif ($type eq "F" || $type eq "N") {
650		    $has_F = 1;
651		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
652		    $has_ML = 1;
653		}
654		$nextline++;
655	    }
656	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
657		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
658	    }
659	    if (!$has_S) {
660		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
661	    }
662	    if (!$has_F) {
663		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
664	    }
665	}
666
667	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
668
669	my $type = $1;
670	my $value = $2;
671
672	## Filename pattern matching
673	if (($type eq "F" || $type eq "X") &&
674	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
675	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
676	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
677	    $value =~ s/\?/\./g;         ##Convert ? to .
678	    ##if pattern is a directory and it lacks a trailing slash, add one
679	    if ((-d $value)) {
680		$value =~ s@([^/])$@$1/@;
681	    }
682	    if (!grep(m@^$value@, @lsfiles)) {
683		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
684	    }
685
686	## Link reachability
687	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
688		 $value =~ /^https?:/ &&
689		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
690	    next if (grep(m@^\Q$value\E$@, @good_links));
691	    my $isbad = 0;
692	    if (grep(m@^\Q$value\E$@, @bad_links)) {
693	        $isbad = 1;
694	    } else {
695		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
696		if ($? == 0) {
697		    push(@good_links, $value);
698		} else {
699		    push(@bad_links, $value);
700		    $isbad = 1;
701		}
702	    }
703	    if ($isbad) {
704	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
705	    }
706
707	## SCM reachability
708	} elsif ($type eq "T" &&
709		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
710	    next if (grep(m@^\Q$value\E$@, @good_links));
711	    my $isbad = 0;
712	    if (grep(m@^\Q$value\E$@, @bad_links)) {
713	        $isbad = 1;
714            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
715		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
716	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
717		my $url = $1;
718		my $branch = "";
719		$branch = $3 if $3;
720		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
721		if ($? == 0) {
722		    push(@good_links, $value);
723		} else {
724		    push(@bad_links, $value);
725		    $isbad = 1;
726		}
727	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
728		my $url = $1;
729		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
730		if ($? == 0) {
731		    push(@good_links, $value);
732		} else {
733		    push(@bad_links, $value);
734		    $isbad = 1;
735		}
736	    }
737	    if ($isbad) {
738		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
739	    }
740	}
741    }
742}
743
744sub ignore_email_address {
745    my ($address) = @_;
746
747    foreach my $ignore (@ignore_emails) {
748	return 1 if ($ignore eq $address);
749    }
750
751    return 0;
752}
753
754sub range_is_maintained {
755    my ($start, $end) = @_;
756
757    for (my $i = $start; $i < $end; $i++) {
758	my $line = $typevalue[$i];
759	if ($line =~ m/^([A-Z]):\s*(.*)/) {
760	    my $type = $1;
761	    my $value = $2;
762	    if ($type eq 'S') {
763		if ($value =~ /(maintain|support)/i) {
764		    return 1;
765		}
766	    }
767	}
768    }
769    return 0;
770}
771
772sub range_has_maintainer {
773    my ($start, $end) = @_;
774
775    for (my $i = $start; $i < $end; $i++) {
776	my $line = $typevalue[$i];
777	if ($line =~ m/^([A-Z]):\s*(.*)/) {
778	    my $type = $1;
779	    my $value = $2;
780	    if ($type eq 'M') {
781		return 1;
782	    }
783	}
784    }
785    return 0;
786}
787
788sub get_maintainers {
789    %email_hash_name = ();
790    %email_hash_address = ();
791    %commit_author_hash = ();
792    %commit_signer_hash = ();
793    @email_to = ();
794    %hash_list_to = ();
795    @list_to = ();
796    @scm = ();
797    @web = ();
798    @subsystem = ();
799    @status = ();
800    %deduplicate_name_hash = ();
801    %deduplicate_address_hash = ();
802    if ($email_git_all_signature_types) {
803	$signature_pattern = "(.+?)[Bb][Yy]:";
804    } else {
805	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
806    }
807
808    # Find responsible parties
809
810    my %exact_pattern_match_hash = ();
811
812    foreach my $file (@files) {
813
814	my %hash;
815	my $tvi = find_first_section();
816	while ($tvi < @typevalue) {
817	    my $start = find_starting_index($tvi);
818	    my $end = find_ending_index($tvi);
819	    my $exclude = 0;
820	    my $i;
821
822	    #Do not match excluded file patterns
823
824	    for ($i = $start; $i < $end; $i++) {
825		my $line = $typevalue[$i];
826		if ($line =~ m/^([A-Z]):\s*(.*)/) {
827		    my $type = $1;
828		    my $value = $2;
829		    if ($type eq 'X') {
830			if (file_match_pattern($file, $value)) {
831			    $exclude = 1;
832			    last;
833			}
834		    }
835		}
836	    }
837
838	    if (!$exclude) {
839		for ($i = $start; $i < $end; $i++) {
840		    my $line = $typevalue[$i];
841		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
842			my $type = $1;
843			my $value = $2;
844			if ($type eq 'F') {
845			    if (file_match_pattern($file, $value)) {
846				my $value_pd = ($value =~ tr@/@@);
847				my $file_pd = ($file  =~ tr@/@@);
848				$value_pd++ if (substr($value,-1,1) ne "/");
849				$value_pd = -1 if ($value =~ /^\.\*/);
850				if ($value_pd >= $file_pd &&
851				    range_is_maintained($start, $end) &&
852				    range_has_maintainer($start, $end)) {
853				    $exact_pattern_match_hash{$file} = 1;
854				}
855				if ($pattern_depth == 0 ||
856				    (($file_pd - $value_pd) < $pattern_depth)) {
857				    $hash{$tvi} = $value_pd;
858				}
859			    }
860			} elsif ($type eq 'N') {
861			    if ($file =~ m/$value/x) {
862				$hash{$tvi} = 0;
863			    }
864			}
865		    }
866		}
867	    }
868	    $tvi = $end + 1;
869	}
870
871	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
872	    add_categories($line);
873	    if ($sections) {
874		my $i;
875		my $start = find_starting_index($line);
876		my $end = find_ending_index($line);
877		for ($i = $start; $i < $end; $i++) {
878		    my $line = $typevalue[$i];
879		    if ($line =~ /^[FX]:/) {		##Restore file patterns
880			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
881			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
882			$line =~ s/\\\./\./g;       	##Convert \. to .
883			$line =~ s/\.\*/\*/g;       	##Convert .* to *
884		    }
885		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
886		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
887			print("$line\n");
888		    }
889		}
890		print("\n");
891	    }
892	}
893    }
894
895    if ($keywords) {
896	@keyword_tvi = sort_and_uniq(@keyword_tvi);
897	foreach my $line (@keyword_tvi) {
898	    add_categories($line);
899	}
900    }
901
902    foreach my $email (@email_to, @list_to) {
903	$email->[0] = deduplicate_email($email->[0]);
904    }
905
906    foreach my $file (@files) {
907	if ($email &&
908	    ($email_git || ($email_git_fallback &&
909			    !$exact_pattern_match_hash{$file}))) {
910	    vcs_file_signoffs($file);
911	}
912	if ($email && $email_git_blame) {
913	    vcs_file_blame($file);
914	}
915    }
916
917    if ($email) {
918	foreach my $chief (@penguin_chief) {
919	    if ($chief =~ m/^(.*):(.*)/) {
920		my $email_address;
921
922		$email_address = format_email($1, $2, $email_usename);
923		if ($email_git_penguin_chiefs) {
924		    push(@email_to, [$email_address, 'chief penguin']);
925		} else {
926		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
927		}
928	    }
929	}
930
931	foreach my $email (@file_emails) {
932	    my ($name, $address) = parse_email($email);
933
934	    my $tmp_email = format_email($name, $address, $email_usename);
935	    push_email_address($tmp_email, '');
936	    add_role($tmp_email, 'in file');
937	}
938    }
939
940    my @to = ();
941    if ($email || $email_list) {
942	if ($email) {
943	    @to = (@to, @email_to);
944	}
945	if ($email_list) {
946	    @to = (@to, @list_to);
947	}
948    }
949
950    if ($interactive) {
951	@to = interactive_get_maintainers(\@to);
952    }
953
954    return @to;
955}
956
957sub file_match_pattern {
958    my ($file, $pattern) = @_;
959    if (substr($pattern, -1) eq "/") {
960	if ($file =~ m@^$pattern@) {
961	    return 1;
962	}
963    } else {
964	if ($file =~ m@^$pattern@) {
965	    my $s1 = ($file =~ tr@/@@);
966	    my $s2 = ($pattern =~ tr@/@@);
967	    if ($s1 == $s2) {
968		return 1;
969	    }
970	}
971    }
972    return 0;
973}
974
975sub usage {
976    print <<EOT;
977usage: $P [options] patchfile
978       $P [options] -f file|directory
979version: $V
980
981MAINTAINER field selection options:
982  --email => print email address(es) if any
983    --git => include recent git \*-by: signers
984    --git-all-signature-types => include signers regardless of signature type
985        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
986    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
987    --git-chief-penguins => include ${penguin_chiefs}
988    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
989    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
990    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
991    --git-blame => use git blame to find modified commits for patch or file
992    --git-blame-signatures => when used with --git-blame, also include all commit signers
993    --git-since => git history to use (default: $email_git_since)
994    --hg-since => hg history to use (default: $email_hg_since)
995    --interactive => display a menu (mostly useful if used with the --git option)
996    --m => include maintainer(s) if any
997    --r => include reviewer(s) if any
998    --n => include name 'Full Name <addr\@domain.tld>'
999    --l => include list(s) if any
1000    --s => include subscriber only list(s) if any
1001    --remove-duplicates => minimize duplicate email names/addresses
1002    --roles => show roles (status:subsystem, git-signer, list, etc...)
1003    --rolestats => show roles and statistics (commits/total_commits, %)
1004    --file-emails => add email addresses found in -f file (default: 0 (off))
1005  --scm => print SCM tree(s) if any
1006  --status => print status if any
1007  --subsystem => print subsystem name if any
1008  --web => print website(s) if any
1009
1010Output type options:
1011  --separator [, ] => separator for multiple entries on 1 line
1012    using --separator also sets --nomultiline if --separator is not [, ]
1013  --multiline => print 1 entry per line
1014
1015Other options:
1016  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1017  --keywords => scan patch for keywords (default: $keywords)
1018  --sections => print all of the subsystem sections with pattern matches
1019  --letters => print all matching 'letter' types from all matching sections
1020  --mailmap => use .mailmap file (default: $email_use_mailmap)
1021  --self-test => show potential issues with MAINTAINERS file content
1022  --version => show version
1023  --help => show this help information
1024
1025Default options:
1026  [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
1027   --remove-duplicates --rolestats]
1028
1029Notes:
1030  Using "-f directory" may give unexpected results:
1031      Used with "--git", git signators for _all_ files in and below
1032          directory are examined as git recurses directories.
1033          Any specified X: (exclude) pattern matches are _not_ ignored.
1034      Used with "--nogit", directory is used as a pattern match,
1035          no individual file within the directory or subdirectory
1036          is matched.
1037      Used with "--git-blame", does not iterate all files in directory
1038  Using "--git-blame" is slow and may add old committers and authors
1039      that are no longer active maintainers to the output.
1040  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1041      other automated tools that expect only ["name"] <email address>
1042      may not work because of additional output after <email address>.
1043  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1044      not the percentage of the entire file authored.  # of commits is
1045      not a good measure of amount of code authored.  1 major commit may
1046      contain a thousand lines, 5 trivial commits may modify a single line.
1047  If git is not installed, but mercurial (hg) is installed and an .hg
1048      repository exists, the following options apply to mercurial:
1049          --git,
1050          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1051          --git-blame
1052      Use --hg-since not --git-since to control date selection
1053  File ".get_maintainer.conf", if it exists in the linux kernel source root
1054      directory, can change whatever get_maintainer defaults are desired.
1055      Entries in this file can be any command line argument.
1056      This file is prepended to any additional command line arguments.
1057      Multiple lines and # comments are allowed.
1058  Most options have both positive and negative forms.
1059      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1060
1061EOT
1062}
1063
1064sub top_of_kernel_tree {
1065    my ($lk_path) = @_;
1066
1067    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1068	$lk_path .= "/";
1069    }
1070    if (   (-f "${lk_path}Kbuild")
1071	&& (-e "${lk_path}MAINTAINERS")
1072	&& (-f "${lk_path}Makefile")
1073	&& (-f "${lk_path}README")
1074	&& (-d "${lk_path}arch")
1075	&& (-d "${lk_path}board")
1076	&& (-d "${lk_path}common")
1077	&& (-d "${lk_path}doc")
1078	&& (-d "${lk_path}drivers")
1079	&& (-d "${lk_path}dts")
1080	&& (-d "${lk_path}fs")
1081	&& (-d "${lk_path}lib")
1082	&& (-d "${lk_path}include")
1083	&& (-d "${lk_path}net")
1084	&& (-d "${lk_path}post")
1085	&& (-d "${lk_path}scripts")
1086	&& (-d "${lk_path}test")
1087	&& (-d "${lk_path}tools")) {
1088	return 1;
1089    }
1090    return 0;
1091}
1092
1093sub parse_email {
1094    my ($formatted_email) = @_;
1095
1096    my $name = "";
1097    my $address = "";
1098
1099    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1100	$name = $1;
1101	$address = $2;
1102    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1103	$address = $1;
1104    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1105	$address = $1;
1106    }
1107
1108    $name =~ s/^\s+|\s+$//g;
1109    $name =~ s/^\"|\"$//g;
1110    $address =~ s/^\s+|\s+$//g;
1111
1112    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
1113	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1114	$name = "\"$name\"";
1115    }
1116
1117    return ($name, $address);
1118}
1119
1120sub format_email {
1121    my ($name, $address, $usename) = @_;
1122
1123    my $formatted_email;
1124
1125    $name =~ s/^\s+|\s+$//g;
1126    $name =~ s/^\"|\"$//g;
1127    $address =~ s/^\s+|\s+$//g;
1128
1129    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
1130	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1131	$name = "\"$name\"";
1132    }
1133
1134    if ($usename) {
1135	if ("$name" eq "") {
1136	    $formatted_email = "$address";
1137	} else {
1138	    $formatted_email = "$name <$address>";
1139	}
1140    } else {
1141	$formatted_email = $address;
1142    }
1143
1144    return $formatted_email;
1145}
1146
1147sub find_first_section {
1148    my $index = 0;
1149
1150    while ($index < @typevalue) {
1151	my $tv = $typevalue[$index];
1152	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1153	    last;
1154	}
1155	$index++;
1156    }
1157
1158    return $index;
1159}
1160
1161sub find_starting_index {
1162    my ($index) = @_;
1163
1164    while ($index > 0) {
1165	my $tv = $typevalue[$index];
1166	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1167	    last;
1168	}
1169	$index--;
1170    }
1171
1172    return $index;
1173}
1174
1175sub find_ending_index {
1176    my ($index) = @_;
1177
1178    while ($index < @typevalue) {
1179	my $tv = $typevalue[$index];
1180	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1181	    last;
1182	}
1183	$index++;
1184    }
1185
1186    return $index;
1187}
1188
1189sub get_subsystem_name {
1190    my ($index) = @_;
1191
1192    my $start = find_starting_index($index);
1193
1194    my $subsystem = $typevalue[$start];
1195    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1196	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1197	$subsystem =~ s/\s*$//;
1198	$subsystem = $subsystem . "...";
1199    }
1200    return $subsystem;
1201}
1202
1203sub get_maintainer_role {
1204    my ($index) = @_;
1205
1206    my $i;
1207    my $start = find_starting_index($index);
1208    my $end = find_ending_index($index);
1209
1210    my $role = "unknown";
1211    my $subsystem = get_subsystem_name($index);
1212
1213    for ($i = $start + 1; $i < $end; $i++) {
1214	my $tv = $typevalue[$i];
1215	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1216	    my $ptype = $1;
1217	    my $pvalue = $2;
1218	    if ($ptype eq "S") {
1219		$role = $pvalue;
1220	    }
1221	}
1222    }
1223
1224    $role = lc($role);
1225    if      ($role eq "supported") {
1226	$role = "supporter";
1227    } elsif ($role eq "maintained") {
1228	$role = "maintainer";
1229    } elsif ($role eq "odd fixes") {
1230	$role = "odd fixer";
1231    } elsif ($role eq "orphan") {
1232	$role = "orphan minder";
1233    } elsif ($role eq "obsolete") {
1234	$role = "obsolete minder";
1235    } elsif ($role eq "buried alive in reporters") {
1236	$role = "chief penguin";
1237    }
1238
1239    return $role . ":" . $subsystem;
1240}
1241
1242sub get_list_role {
1243    my ($index) = @_;
1244
1245    my $subsystem = get_subsystem_name($index);
1246
1247    if ($subsystem eq "THE REST") {
1248	$subsystem = "";
1249    }
1250
1251    return $subsystem;
1252}
1253
1254sub add_categories {
1255    my ($index) = @_;
1256
1257    my $i;
1258    my $start = find_starting_index($index);
1259    my $end = find_ending_index($index);
1260
1261    push(@subsystem, $typevalue[$start]);
1262
1263    for ($i = $start + 1; $i < $end; $i++) {
1264	my $tv = $typevalue[$i];
1265	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1266	    my $ptype = $1;
1267	    my $pvalue = $2;
1268	    if ($ptype eq "L") {
1269		my $list_address = $pvalue;
1270		my $list_additional = "";
1271		my $list_role = get_list_role($i);
1272
1273		if ($list_role ne "") {
1274		    $list_role = ":" . $list_role;
1275		}
1276		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1277		    $list_address = $1;
1278		    $list_additional = $2;
1279		}
1280		if ($list_additional =~ m/subscribers-only/) {
1281		    if ($email_subscriber_list) {
1282			if (!$hash_list_to{lc($list_address)}) {
1283			    $hash_list_to{lc($list_address)} = 1;
1284			    push(@list_to, [$list_address,
1285					    "subscriber list${list_role}"]);
1286			}
1287		    }
1288		} else {
1289		    if ($email_list) {
1290			if (!$hash_list_to{lc($list_address)}) {
1291			    $hash_list_to{lc($list_address)} = 1;
1292			    if ($list_additional =~ m/moderated/) {
1293				push(@list_to, [$list_address,
1294						"moderated list${list_role}"]);
1295			    } else {
1296				push(@list_to, [$list_address,
1297						"open list${list_role}"]);
1298			    }
1299			}
1300		    }
1301		}
1302	    } elsif ($ptype eq "M") {
1303		my ($name, $address) = parse_email($pvalue);
1304		if ($name eq "") {
1305		    if ($i > 0) {
1306			my $tv = $typevalue[$i - 1];
1307			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1308			    if ($1 eq "P") {
1309				$name = $2;
1310				$pvalue = format_email($name, $address, $email_usename);
1311			    }
1312			}
1313		    }
1314		}
1315		if ($email_maintainer) {
1316		    my $role = get_maintainer_role($i);
1317		    push_email_addresses($pvalue, $role);
1318		}
1319	    } elsif ($ptype eq "R") {
1320		my ($name, $address) = parse_email($pvalue);
1321		if ($name eq "") {
1322		    if ($i > 0) {
1323			my $tv = $typevalue[$i - 1];
1324			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1325			    if ($1 eq "P") {
1326				$name = $2;
1327				$pvalue = format_email($name, $address, $email_usename);
1328			    }
1329			}
1330		    }
1331		}
1332		if ($email_reviewer) {
1333		    my $subsystem = get_subsystem_name($i);
1334		    push_email_addresses($pvalue, "reviewer:$subsystem");
1335		}
1336	    } elsif ($ptype eq "T") {
1337		push(@scm, $pvalue);
1338	    } elsif ($ptype eq "W") {
1339		push(@web, $pvalue);
1340	    } elsif ($ptype eq "S") {
1341		push(@status, $pvalue);
1342	    }
1343	}
1344    }
1345}
1346
1347sub email_inuse {
1348    my ($name, $address) = @_;
1349
1350    return 1 if (($name eq "") && ($address eq ""));
1351    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1352    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1353
1354    return 0;
1355}
1356
1357sub push_email_address {
1358    my ($line, $role) = @_;
1359
1360    my ($name, $address) = parse_email($line);
1361
1362    if ($address eq "") {
1363	return 0;
1364    }
1365
1366    if (!$email_remove_duplicates) {
1367	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1368    } elsif (!email_inuse($name, $address)) {
1369	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1370	$email_hash_name{lc($name)}++ if ($name ne "");
1371	$email_hash_address{lc($address)}++;
1372    }
1373
1374    return 1;
1375}
1376
1377sub push_email_addresses {
1378    my ($address, $role) = @_;
1379
1380    my @address_list = ();
1381
1382    if (rfc822_valid($address)) {
1383	push_email_address($address, $role);
1384    } elsif (@address_list = rfc822_validlist($address)) {
1385	my $array_count = shift(@address_list);
1386	while (my $entry = shift(@address_list)) {
1387	    push_email_address($entry, $role);
1388	}
1389    } else {
1390	if (!push_email_address($address, $role)) {
1391	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1392	}
1393    }
1394}
1395
1396sub add_role {
1397    my ($line, $role) = @_;
1398
1399    my ($name, $address) = parse_email($line);
1400    my $email = format_email($name, $address, $email_usename);
1401
1402    foreach my $entry (@email_to) {
1403	if ($email_remove_duplicates) {
1404	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1405	    if (($name eq $entry_name || $address eq $entry_address)
1406		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1407	    ) {
1408		if ($entry->[1] eq "") {
1409		    $entry->[1] = "$role";
1410		} else {
1411		    $entry->[1] = "$entry->[1],$role";
1412		}
1413	    }
1414	} else {
1415	    if ($email eq $entry->[0]
1416		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1417	    ) {
1418		if ($entry->[1] eq "") {
1419		    $entry->[1] = "$role";
1420		} else {
1421		    $entry->[1] = "$entry->[1],$role";
1422		}
1423	    }
1424	}
1425    }
1426}
1427
1428sub which {
1429    my ($bin) = @_;
1430
1431    foreach my $path (split(/:/, $ENV{PATH})) {
1432	if (-e "$path/$bin") {
1433	    return "$path/$bin";
1434	}
1435    }
1436
1437    return "";
1438}
1439
1440sub which_conf {
1441    my ($conf) = @_;
1442
1443    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1444	if (-e "$path/$conf") {
1445	    return "$path/$conf";
1446	}
1447    }
1448
1449    return "";
1450}
1451
1452sub mailmap_email {
1453    my ($line) = @_;
1454
1455    my ($name, $address) = parse_email($line);
1456    my $email = format_email($name, $address, 1);
1457    my $real_name = $name;
1458    my $real_address = $address;
1459
1460    if (exists $mailmap->{names}->{$email} ||
1461	exists $mailmap->{addresses}->{$email}) {
1462	if (exists $mailmap->{names}->{$email}) {
1463	    $real_name = $mailmap->{names}->{$email};
1464	}
1465	if (exists $mailmap->{addresses}->{$email}) {
1466	    $real_address = $mailmap->{addresses}->{$email};
1467	}
1468    } else {
1469	if (exists $mailmap->{names}->{$address}) {
1470	    $real_name = $mailmap->{names}->{$address};
1471	}
1472	if (exists $mailmap->{addresses}->{$address}) {
1473	    $real_address = $mailmap->{addresses}->{$address};
1474	}
1475    }
1476    return format_email($real_name, $real_address, 1);
1477}
1478
1479sub mailmap {
1480    my (@addresses) = @_;
1481
1482    my @mapped_emails = ();
1483    foreach my $line (@addresses) {
1484	push(@mapped_emails, mailmap_email($line));
1485    }
1486    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1487    return @mapped_emails;
1488}
1489
1490sub merge_by_realname {
1491    my %address_map;
1492    my (@emails) = @_;
1493
1494    foreach my $email (@emails) {
1495	my ($name, $address) = parse_email($email);
1496	if (exists $address_map{$name}) {
1497	    $address = $address_map{$name};
1498	    $email = format_email($name, $address, 1);
1499	} else {
1500	    $address_map{$name} = $address;
1501	}
1502    }
1503}
1504
1505sub git_execute_cmd {
1506    my ($cmd) = @_;
1507    my @lines = ();
1508
1509    my $output = `$cmd`;
1510    $output =~ s/^\s*//gm;
1511    @lines = split("\n", $output);
1512
1513    return @lines;
1514}
1515
1516sub hg_execute_cmd {
1517    my ($cmd) = @_;
1518    my @lines = ();
1519
1520    my $output = `$cmd`;
1521    @lines = split("\n", $output);
1522
1523    return @lines;
1524}
1525
1526sub extract_formatted_signatures {
1527    my (@signature_lines) = @_;
1528
1529    my @type = @signature_lines;
1530
1531    s/\s*(.*):.*/$1/ for (@type);
1532
1533    # cut -f2- -d":"
1534    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1535
1536## Reformat email addresses (with names) to avoid badly written signatures
1537
1538    foreach my $signer (@signature_lines) {
1539	$signer = deduplicate_email($signer);
1540    }
1541
1542    return (\@type, \@signature_lines);
1543}
1544
1545sub vcs_find_signers {
1546    my ($cmd, $file) = @_;
1547    my $commits;
1548    my @lines = ();
1549    my @signatures = ();
1550    my @authors = ();
1551    my @stats = ();
1552
1553    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1554
1555    my $pattern = $VCS_cmds{"commit_pattern"};
1556    my $author_pattern = $VCS_cmds{"author_pattern"};
1557    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1558
1559    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1560
1561    $commits = grep(/$pattern/, @lines);	# of commits
1562
1563    @authors = grep(/$author_pattern/, @lines);
1564    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1565    @stats = grep(/$stat_pattern/, @lines);
1566
1567#    print("stats: <@stats>\n");
1568
1569    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1570
1571    save_commits_by_author(@lines) if ($interactive);
1572    save_commits_by_signer(@lines) if ($interactive);
1573
1574    if (!$email_git_penguin_chiefs) {
1575	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1576    }
1577
1578    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1579    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1580
1581    return ($commits, $signers_ref, $authors_ref, \@stats);
1582}
1583
1584sub vcs_find_author {
1585    my ($cmd) = @_;
1586    my @lines = ();
1587
1588    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1589
1590    if (!$email_git_penguin_chiefs) {
1591	@lines = grep(!/${penguin_chiefs}/i, @lines);
1592    }
1593
1594    return @lines if !@lines;
1595
1596    my @authors = ();
1597    foreach my $line (@lines) {
1598	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1599	    my $author = $1;
1600	    my ($name, $address) = parse_email($author);
1601	    $author = format_email($name, $address, 1);
1602	    push(@authors, $author);
1603	}
1604    }
1605
1606    save_commits_by_author(@lines) if ($interactive);
1607    save_commits_by_signer(@lines) if ($interactive);
1608
1609    return @authors;
1610}
1611
1612sub vcs_save_commits {
1613    my ($cmd) = @_;
1614    my @lines = ();
1615    my @commits = ();
1616
1617    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1618
1619    foreach my $line (@lines) {
1620	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1621	    push(@commits, $1);
1622	}
1623    }
1624
1625    return @commits;
1626}
1627
1628sub vcs_blame {
1629    my ($file) = @_;
1630    my $cmd;
1631    my @commits = ();
1632
1633    return @commits if (!(-f $file));
1634
1635    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1636	my @all_commits = ();
1637
1638	$cmd = $VCS_cmds{"blame_file_cmd"};
1639	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1640	@all_commits = vcs_save_commits($cmd);
1641
1642	foreach my $file_range_diff (@range) {
1643	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1644	    my $diff_file = $1;
1645	    my $diff_start = $2;
1646	    my $diff_length = $3;
1647	    next if ("$file" ne "$diff_file");
1648	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1649		push(@commits, $all_commits[$i]);
1650	    }
1651	}
1652    } elsif (@range) {
1653	foreach my $file_range_diff (@range) {
1654	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1655	    my $diff_file = $1;
1656	    my $diff_start = $2;
1657	    my $diff_length = $3;
1658	    next if ("$file" ne "$diff_file");
1659	    $cmd = $VCS_cmds{"blame_range_cmd"};
1660	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1661	    push(@commits, vcs_save_commits($cmd));
1662	}
1663    } else {
1664	$cmd = $VCS_cmds{"blame_file_cmd"};
1665	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1666	@commits = vcs_save_commits($cmd);
1667    }
1668
1669    foreach my $commit (@commits) {
1670	$commit =~ s/^\^//g;
1671    }
1672
1673    return @commits;
1674}
1675
1676my $printed_novcs = 0;
1677sub vcs_exists {
1678    %VCS_cmds = %VCS_cmds_git;
1679    return 1 if eval $VCS_cmds{"available"};
1680    %VCS_cmds = %VCS_cmds_hg;
1681    return 2 if eval $VCS_cmds{"available"};
1682    %VCS_cmds = ();
1683    if (!$printed_novcs) {
1684	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1685	warn("Using a git repository produces better results.\n");
1686	warn("Try Linus Torvalds' latest git repository using:\n");
1687	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1688	$printed_novcs = 1;
1689    }
1690    return 0;
1691}
1692
1693sub vcs_is_git {
1694    vcs_exists();
1695    return $vcs_used == 1;
1696}
1697
1698sub vcs_is_hg {
1699    return $vcs_used == 2;
1700}
1701
1702sub interactive_get_maintainers {
1703    my ($list_ref) = @_;
1704    my @list = @$list_ref;
1705
1706    vcs_exists();
1707
1708    my %selected;
1709    my %authored;
1710    my %signed;
1711    my $count = 0;
1712    my $maintained = 0;
1713    foreach my $entry (@list) {
1714	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1715	$selected{$count} = 1;
1716	$authored{$count} = 0;
1717	$signed{$count} = 0;
1718	$count++;
1719    }
1720
1721    #menu loop
1722    my $done = 0;
1723    my $print_options = 0;
1724    my $redraw = 1;
1725    while (!$done) {
1726	$count = 0;
1727	if ($redraw) {
1728	    printf STDERR "\n%1s %2s %-65s",
1729			  "*", "#", "email/list and role:stats";
1730	    if ($email_git ||
1731		($email_git_fallback && !$maintained) ||
1732		$email_git_blame) {
1733		print STDERR "auth sign";
1734	    }
1735	    print STDERR "\n";
1736	    foreach my $entry (@list) {
1737		my $email = $entry->[0];
1738		my $role = $entry->[1];
1739		my $sel = "";
1740		$sel = "*" if ($selected{$count});
1741		my $commit_author = $commit_author_hash{$email};
1742		my $commit_signer = $commit_signer_hash{$email};
1743		my $authored = 0;
1744		my $signed = 0;
1745		$authored++ for (@{$commit_author});
1746		$signed++ for (@{$commit_signer});
1747		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1748		printf STDERR "%4d %4d", $authored, $signed
1749		    if ($authored > 0 || $signed > 0);
1750		printf STDERR "\n     %s\n", $role;
1751		if ($authored{$count}) {
1752		    my $commit_author = $commit_author_hash{$email};
1753		    foreach my $ref (@{$commit_author}) {
1754			print STDERR "     Author: @{$ref}[1]\n";
1755		    }
1756		}
1757		if ($signed{$count}) {
1758		    my $commit_signer = $commit_signer_hash{$email};
1759		    foreach my $ref (@{$commit_signer}) {
1760			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1761		    }
1762		}
1763
1764		$count++;
1765	    }
1766	}
1767	my $date_ref = \$email_git_since;
1768	$date_ref = \$email_hg_since if (vcs_is_hg());
1769	if ($print_options) {
1770	    $print_options = 0;
1771	    if (vcs_exists()) {
1772		print STDERR <<EOT
1773
1774Version Control options:
1775g  use git history      [$email_git]
1776gf use git-fallback     [$email_git_fallback]
1777b  use git blame        [$email_git_blame]
1778bs use blame signatures [$email_git_blame_signatures]
1779c# minimum commits      [$email_git_min_signatures]
1780%# min percent          [$email_git_min_percent]
1781d# history to use       [$$date_ref]
1782x# max maintainers      [$email_git_max_maintainers]
1783t  all signature types  [$email_git_all_signature_types]
1784m  use .mailmap         [$email_use_mailmap]
1785EOT
1786	    }
1787	    print STDERR <<EOT
1788
1789Additional options:
17900  toggle all
1791tm toggle maintainers
1792tg toggle git entries
1793tl toggle open list entries
1794ts toggle subscriber list entries
1795f  emails in file       [$file_emails]
1796k  keywords in file     [$keywords]
1797r  remove duplicates    [$email_remove_duplicates]
1798p# pattern match depth  [$pattern_depth]
1799EOT
1800	}
1801	print STDERR
1802"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1803
1804	my $input = <STDIN>;
1805	chomp($input);
1806
1807	$redraw = 1;
1808	my $rerun = 0;
1809	my @wish = split(/[, ]+/, $input);
1810	foreach my $nr (@wish) {
1811	    $nr = lc($nr);
1812	    my $sel = substr($nr, 0, 1);
1813	    my $str = substr($nr, 1);
1814	    my $val = 0;
1815	    $val = $1 if $str =~ /^(\d+)$/;
1816
1817	    if ($sel eq "y") {
1818		$interactive = 0;
1819		$done = 1;
1820		$output_rolestats = 0;
1821		$output_roles = 0;
1822		last;
1823	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1824		$selected{$nr - 1} = !$selected{$nr - 1};
1825	    } elsif ($sel eq "*" || $sel eq '^') {
1826		my $toggle = 0;
1827		$toggle = 1 if ($sel eq '*');
1828		for (my $i = 0; $i < $count; $i++) {
1829		    $selected{$i} = $toggle;
1830		}
1831	    } elsif ($sel eq "0") {
1832		for (my $i = 0; $i < $count; $i++) {
1833		    $selected{$i} = !$selected{$i};
1834		}
1835	    } elsif ($sel eq "t") {
1836		if (lc($str) eq "m") {
1837		    for (my $i = 0; $i < $count; $i++) {
1838			$selected{$i} = !$selected{$i}
1839			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1840		    }
1841		} elsif (lc($str) eq "g") {
1842		    for (my $i = 0; $i < $count; $i++) {
1843			$selected{$i} = !$selected{$i}
1844			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1845		    }
1846		} elsif (lc($str) eq "l") {
1847		    for (my $i = 0; $i < $count; $i++) {
1848			$selected{$i} = !$selected{$i}
1849			    if ($list[$i]->[1] =~ /^(open list)/i);
1850		    }
1851		} elsif (lc($str) eq "s") {
1852		    for (my $i = 0; $i < $count; $i++) {
1853			$selected{$i} = !$selected{$i}
1854			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1855		    }
1856		}
1857	    } elsif ($sel eq "a") {
1858		if ($val > 0 && $val <= $count) {
1859		    $authored{$val - 1} = !$authored{$val - 1};
1860		} elsif ($str eq '*' || $str eq '^') {
1861		    my $toggle = 0;
1862		    $toggle = 1 if ($str eq '*');
1863		    for (my $i = 0; $i < $count; $i++) {
1864			$authored{$i} = $toggle;
1865		    }
1866		}
1867	    } elsif ($sel eq "s") {
1868		if ($val > 0 && $val <= $count) {
1869		    $signed{$val - 1} = !$signed{$val - 1};
1870		} elsif ($str eq '*' || $str eq '^') {
1871		    my $toggle = 0;
1872		    $toggle = 1 if ($str eq '*');
1873		    for (my $i = 0; $i < $count; $i++) {
1874			$signed{$i} = $toggle;
1875		    }
1876		}
1877	    } elsif ($sel eq "o") {
1878		$print_options = 1;
1879		$redraw = 1;
1880	    } elsif ($sel eq "g") {
1881		if ($str eq "f") {
1882		    bool_invert(\$email_git_fallback);
1883		} else {
1884		    bool_invert(\$email_git);
1885		}
1886		$rerun = 1;
1887	    } elsif ($sel eq "b") {
1888		if ($str eq "s") {
1889		    bool_invert(\$email_git_blame_signatures);
1890		} else {
1891		    bool_invert(\$email_git_blame);
1892		}
1893		$rerun = 1;
1894	    } elsif ($sel eq "c") {
1895		if ($val > 0) {
1896		    $email_git_min_signatures = $val;
1897		    $rerun = 1;
1898		}
1899	    } elsif ($sel eq "x") {
1900		if ($val > 0) {
1901		    $email_git_max_maintainers = $val;
1902		    $rerun = 1;
1903		}
1904	    } elsif ($sel eq "%") {
1905		if ($str ne "" && $val >= 0) {
1906		    $email_git_min_percent = $val;
1907		    $rerun = 1;
1908		}
1909	    } elsif ($sel eq "d") {
1910		if (vcs_is_git()) {
1911		    $email_git_since = $str;
1912		} elsif (vcs_is_hg()) {
1913		    $email_hg_since = $str;
1914		}
1915		$rerun = 1;
1916	    } elsif ($sel eq "t") {
1917		bool_invert(\$email_git_all_signature_types);
1918		$rerun = 1;
1919	    } elsif ($sel eq "f") {
1920		bool_invert(\$file_emails);
1921		$rerun = 1;
1922	    } elsif ($sel eq "r") {
1923		bool_invert(\$email_remove_duplicates);
1924		$rerun = 1;
1925	    } elsif ($sel eq "m") {
1926		bool_invert(\$email_use_mailmap);
1927		read_mailmap();
1928		$rerun = 1;
1929	    } elsif ($sel eq "k") {
1930		bool_invert(\$keywords);
1931		$rerun = 1;
1932	    } elsif ($sel eq "p") {
1933		if ($str ne "" && $val >= 0) {
1934		    $pattern_depth = $val;
1935		    $rerun = 1;
1936		}
1937	    } elsif ($sel eq "h" || $sel eq "?") {
1938		print STDERR <<EOT
1939
1940Interactive mode allows you to select the various maintainers, submitters,
1941commit signers and mailing lists that could be CC'd on a patch.
1942
1943Any *'d entry is selected.
1944
1945If you have git or hg installed, you can choose to summarize the commit
1946history of files in the patch.  Also, each line of the current file can
1947be matched to its commit author and that commits signers with blame.
1948
1949Various knobs exist to control the length of time for active commit
1950tracking, the maximum number of commit authors and signers to add,
1951and such.
1952
1953Enter selections at the prompt until you are satisfied that the selected
1954maintainers are appropriate.  You may enter multiple selections separated
1955by either commas or spaces.
1956
1957EOT
1958	    } else {
1959		print STDERR "invalid option: '$nr'\n";
1960		$redraw = 0;
1961	    }
1962	}
1963	if ($rerun) {
1964	    print STDERR "git-blame can be very slow, please have patience..."
1965		if ($email_git_blame);
1966	    goto &get_maintainers;
1967	}
1968    }
1969
1970    #drop not selected entries
1971    $count = 0;
1972    my @new_emailto = ();
1973    foreach my $entry (@list) {
1974	if ($selected{$count}) {
1975	    push(@new_emailto, $list[$count]);
1976	}
1977	$count++;
1978    }
1979    return @new_emailto;
1980}
1981
1982sub bool_invert {
1983    my ($bool_ref) = @_;
1984
1985    if ($$bool_ref) {
1986	$$bool_ref = 0;
1987    } else {
1988	$$bool_ref = 1;
1989    }
1990}
1991
1992sub deduplicate_email {
1993    my ($email) = @_;
1994
1995    my $matched = 0;
1996    my ($name, $address) = parse_email($email);
1997    $email = format_email($name, $address, 1);
1998    $email = mailmap_email($email);
1999
2000    return $email if (!$email_remove_duplicates);
2001
2002    ($name, $address) = parse_email($email);
2003
2004    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2005	$name = $deduplicate_name_hash{lc($name)}->[0];
2006	$address = $deduplicate_name_hash{lc($name)}->[1];
2007	$matched = 1;
2008    } elsif ($deduplicate_address_hash{lc($address)}) {
2009	$name = $deduplicate_address_hash{lc($address)}->[0];
2010	$address = $deduplicate_address_hash{lc($address)}->[1];
2011	$matched = 1;
2012    }
2013    if (!$matched) {
2014	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2015	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2016    }
2017    $email = format_email($name, $address, 1);
2018    $email = mailmap_email($email);
2019    return $email;
2020}
2021
2022sub save_commits_by_author {
2023    my (@lines) = @_;
2024
2025    my @authors = ();
2026    my @commits = ();
2027    my @subjects = ();
2028
2029    foreach my $line (@lines) {
2030	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2031	    my $author = $1;
2032	    $author = deduplicate_email($author);
2033	    push(@authors, $author);
2034	}
2035	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2036	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2037    }
2038
2039    for (my $i = 0; $i < @authors; $i++) {
2040	my $exists = 0;
2041	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2042	    if (@{$ref}[0] eq $commits[$i] &&
2043		@{$ref}[1] eq $subjects[$i]) {
2044		$exists = 1;
2045		last;
2046	    }
2047	}
2048	if (!$exists) {
2049	    push(@{$commit_author_hash{$authors[$i]}},
2050		 [ ($commits[$i], $subjects[$i]) ]);
2051	}
2052    }
2053}
2054
2055sub save_commits_by_signer {
2056    my (@lines) = @_;
2057
2058    my $commit = "";
2059    my $subject = "";
2060
2061    foreach my $line (@lines) {
2062	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2063	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2064	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2065	    my @signatures = ($line);
2066	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2067	    my @types = @$types_ref;
2068	    my @signers = @$signers_ref;
2069
2070	    my $type = $types[0];
2071	    my $signer = $signers[0];
2072
2073	    $signer = deduplicate_email($signer);
2074
2075	    my $exists = 0;
2076	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2077		if (@{$ref}[0] eq $commit &&
2078		    @{$ref}[1] eq $subject &&
2079		    @{$ref}[2] eq $type) {
2080		    $exists = 1;
2081		    last;
2082		}
2083	    }
2084	    if (!$exists) {
2085		push(@{$commit_signer_hash{$signer}},
2086		     [ ($commit, $subject, $type) ]);
2087	    }
2088	}
2089    }
2090}
2091
2092sub vcs_assign {
2093    my ($role, $divisor, @lines) = @_;
2094
2095    my %hash;
2096    my $count = 0;
2097
2098    return if (@lines <= 0);
2099
2100    if ($divisor <= 0) {
2101	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2102	$divisor = 1;
2103    }
2104
2105    @lines = mailmap(@lines);
2106
2107    return if (@lines <= 0);
2108
2109    @lines = sort(@lines);
2110
2111    # uniq -c
2112    $hash{$_}++ for @lines;
2113
2114    # sort -rn
2115    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2116	my $sign_offs = $hash{$line};
2117	my $percent = $sign_offs * 100 / $divisor;
2118
2119	$percent = 100 if ($percent > 100);
2120	next if (ignore_email_address($line));
2121	$count++;
2122	last if ($sign_offs < $email_git_min_signatures ||
2123		 $count > $email_git_max_maintainers ||
2124		 $percent < $email_git_min_percent);
2125	push_email_address($line, '');
2126	if ($output_rolestats) {
2127	    my $fmt_percent = sprintf("%.0f", $percent);
2128	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2129	} else {
2130	    add_role($line, $role);
2131	}
2132    }
2133}
2134
2135sub vcs_file_signoffs {
2136    my ($file) = @_;
2137
2138    my $authors_ref;
2139    my $signers_ref;
2140    my $stats_ref;
2141    my @authors = ();
2142    my @signers = ();
2143    my @stats = ();
2144    my $commits;
2145
2146    $vcs_used = vcs_exists();
2147    return if (!$vcs_used);
2148
2149    my $cmd = $VCS_cmds{"find_signers_cmd"};
2150    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2151
2152    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2153
2154    @signers = @{$signers_ref} if defined $signers_ref;
2155    @authors = @{$authors_ref} if defined $authors_ref;
2156    @stats = @{$stats_ref} if defined $stats_ref;
2157
2158#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2159
2160    foreach my $signer (@signers) {
2161	$signer = deduplicate_email($signer);
2162    }
2163
2164    vcs_assign("commit_signer", $commits, @signers);
2165    vcs_assign("authored", $commits, @authors);
2166    if ($#authors == $#stats) {
2167	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2168	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2169
2170	my $added = 0;
2171	my $deleted = 0;
2172	for (my $i = 0; $i <= $#stats; $i++) {
2173	    if ($stats[$i] =~ /$stat_pattern/) {
2174		$added += $1;
2175		$deleted += $2;
2176	    }
2177	}
2178	my @tmp_authors = uniq(@authors);
2179	foreach my $author (@tmp_authors) {
2180	    $author = deduplicate_email($author);
2181	}
2182	@tmp_authors = uniq(@tmp_authors);
2183	my @list_added = ();
2184	my @list_deleted = ();
2185	foreach my $author (@tmp_authors) {
2186	    my $auth_added = 0;
2187	    my $auth_deleted = 0;
2188	    for (my $i = 0; $i <= $#stats; $i++) {
2189		if ($author eq deduplicate_email($authors[$i]) &&
2190		    $stats[$i] =~ /$stat_pattern/) {
2191		    $auth_added += $1;
2192		    $auth_deleted += $2;
2193		}
2194	    }
2195	    for (my $i = 0; $i < $auth_added; $i++) {
2196		push(@list_added, $author);
2197	    }
2198	    for (my $i = 0; $i < $auth_deleted; $i++) {
2199		push(@list_deleted, $author);
2200	    }
2201	}
2202	vcs_assign("added_lines", $added, @list_added);
2203	vcs_assign("removed_lines", $deleted, @list_deleted);
2204    }
2205}
2206
2207sub vcs_file_blame {
2208    my ($file) = @_;
2209
2210    my @signers = ();
2211    my @all_commits = ();
2212    my @commits = ();
2213    my $total_commits;
2214    my $total_lines;
2215
2216    $vcs_used = vcs_exists();
2217    return if (!$vcs_used);
2218
2219    @all_commits = vcs_blame($file);
2220    @commits = uniq(@all_commits);
2221    $total_commits = @commits;
2222    $total_lines = @all_commits;
2223
2224    if ($email_git_blame_signatures) {
2225	if (vcs_is_hg()) {
2226	    my $commit_count;
2227	    my $commit_authors_ref;
2228	    my $commit_signers_ref;
2229	    my $stats_ref;
2230	    my @commit_authors = ();
2231	    my @commit_signers = ();
2232	    my $commit = join(" -r ", @commits);
2233	    my $cmd;
2234
2235	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2236	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2237
2238	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2239	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2240	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2241
2242	    push(@signers, @commit_signers);
2243	} else {
2244	    foreach my $commit (@commits) {
2245		my $commit_count;
2246		my $commit_authors_ref;
2247		my $commit_signers_ref;
2248		my $stats_ref;
2249		my @commit_authors = ();
2250		my @commit_signers = ();
2251		my $cmd;
2252
2253		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2254		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2255
2256		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2257		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2258		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2259
2260		push(@signers, @commit_signers);
2261	    }
2262	}
2263    }
2264
2265    if ($from_filename) {
2266	if ($output_rolestats) {
2267	    my @blame_signers;
2268	    if (vcs_is_hg()) {{		# Double brace for last exit
2269		my $commit_count;
2270		my @commit_signers = ();
2271		@commits = uniq(@commits);
2272		@commits = sort(@commits);
2273		my $commit = join(" -r ", @commits);
2274		my $cmd;
2275
2276		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2277		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2278
2279		my @lines = ();
2280
2281		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2282
2283		if (!$email_git_penguin_chiefs) {
2284		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2285		}
2286
2287		last if !@lines;
2288
2289		my @authors = ();
2290		foreach my $line (@lines) {
2291		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2292			my $author = $1;
2293			$author = deduplicate_email($author);
2294			push(@authors, $author);
2295		    }
2296		}
2297
2298		save_commits_by_author(@lines) if ($interactive);
2299		save_commits_by_signer(@lines) if ($interactive);
2300
2301		push(@signers, @authors);
2302	    }}
2303	    else {
2304		foreach my $commit (@commits) {
2305		    my $i;
2306		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2307		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2308		    my @author = vcs_find_author($cmd);
2309		    next if !@author;
2310
2311		    my $formatted_author = deduplicate_email($author[0]);
2312
2313		    my $count = grep(/$commit/, @all_commits);
2314		    for ($i = 0; $i < $count ; $i++) {
2315			push(@blame_signers, $formatted_author);
2316		    }
2317		}
2318	    }
2319	    if (@blame_signers) {
2320		vcs_assign("authored lines", $total_lines, @blame_signers);
2321	    }
2322	}
2323	foreach my $signer (@signers) {
2324	    $signer = deduplicate_email($signer);
2325	}
2326	vcs_assign("commits", $total_commits, @signers);
2327    } else {
2328	foreach my $signer (@signers) {
2329	    $signer = deduplicate_email($signer);
2330	}
2331	vcs_assign("modified commits", $total_commits, @signers);
2332    }
2333}
2334
2335sub vcs_file_exists {
2336    my ($file) = @_;
2337
2338    my $exists;
2339
2340    my $vcs_used = vcs_exists();
2341    return 0 if (!$vcs_used);
2342
2343    my $cmd = $VCS_cmds{"file_exists_cmd"};
2344    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2345    $cmd .= " 2>&1";
2346    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2347
2348    return 0 if ($? != 0);
2349
2350    return $exists;
2351}
2352
2353sub vcs_list_files {
2354    my ($file) = @_;
2355
2356    my @lsfiles = ();
2357
2358    my $vcs_used = vcs_exists();
2359    return 0 if (!$vcs_used);
2360
2361    my $cmd = $VCS_cmds{"list_files_cmd"};
2362    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2363    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2364
2365    return () if ($? != 0);
2366
2367    return @lsfiles;
2368}
2369
2370sub uniq {
2371    my (@parms) = @_;
2372
2373    my %saw;
2374    @parms = grep(!$saw{$_}++, @parms);
2375    return @parms;
2376}
2377
2378sub sort_and_uniq {
2379    my (@parms) = @_;
2380
2381    my %saw;
2382    @parms = sort @parms;
2383    @parms = grep(!$saw{$_}++, @parms);
2384    return @parms;
2385}
2386
2387sub clean_file_emails {
2388    my (@file_emails) = @_;
2389    my @fmt_emails = ();
2390
2391    foreach my $email (@file_emails) {
2392	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2393	my ($name, $address) = parse_email($email);
2394	if ($name eq '"[,\.]"') {
2395	    $name = "";
2396	}
2397
2398	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2399	if (@nw > 2) {
2400	    my $first = $nw[@nw - 3];
2401	    my $middle = $nw[@nw - 2];
2402	    my $last = $nw[@nw - 1];
2403
2404	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2405		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2406		(length($middle) == 1 ||
2407		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2408		$name = "$first $middle $last";
2409	    } else {
2410		$name = "$middle $last";
2411	    }
2412	}
2413
2414	if (substr($name, -1) =~ /[,\.]/) {
2415	    $name = substr($name, 0, length($name) - 1);
2416	} elsif (substr($name, -2) =~ /[,\.]"/) {
2417	    $name = substr($name, 0, length($name) - 2) . '"';
2418	}
2419
2420	if (substr($name, 0, 1) =~ /[,\.]/) {
2421	    $name = substr($name, 1, length($name) - 1);
2422	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2423	    $name = '"' . substr($name, 2, length($name) - 2);
2424	}
2425
2426	my $fmt_email = format_email($name, $address, $email_usename);
2427	push(@fmt_emails, $fmt_email);
2428    }
2429    return @fmt_emails;
2430}
2431
2432sub merge_email {
2433    my @lines;
2434    my %saw;
2435
2436    for (@_) {
2437	my ($address, $role) = @$_;
2438	if (!$saw{$address}) {
2439	    if ($output_roles) {
2440		push(@lines, "$address ($role)");
2441	    } else {
2442		push(@lines, $address);
2443	    }
2444	    $saw{$address} = 1;
2445	}
2446    }
2447
2448    return @lines;
2449}
2450
2451sub output {
2452    my (@parms) = @_;
2453
2454    if ($output_multiline) {
2455	foreach my $line (@parms) {
2456	    print("${line}\n");
2457	}
2458    } else {
2459	print(join($output_separator, @parms));
2460	print("\n");
2461    }
2462}
2463
2464my $rfc822re;
2465
2466sub make_rfc822re {
2467#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2468#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2469#   This regexp will only work on addresses which have had comments stripped
2470#   and replaced with rfc822_lwsp.
2471
2472    my $specials = '()<>@,;:\\\\".\\[\\]';
2473    my $controls = '\\000-\\037\\177';
2474
2475    my $dtext = "[^\\[\\]\\r\\\\]";
2476    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2477
2478    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2479
2480#   Use zero-width assertion to spot the limit of an atom.  A simple
2481#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2482    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2483    my $word = "(?:$atom|$quoted_string)";
2484    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2485
2486    my $sub_domain = "(?:$atom|$domain_literal)";
2487    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2488
2489    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2490
2491    my $phrase = "$word*";
2492    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2493    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2494    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2495
2496    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2497    my $address = "(?:$mailbox|$group)";
2498
2499    return "$rfc822_lwsp*$address";
2500}
2501
2502sub rfc822_strip_comments {
2503    my $s = shift;
2504#   Recursively remove comments, and replace with a single space.  The simpler
2505#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2506#   chars in atoms, for example.
2507
2508    while ($s =~ s/^((?:[^"\\]|\\.)*
2509                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2510                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2511    return $s;
2512}
2513
2514#   valid: returns true if the parameter is an RFC822 valid address
2515#
2516sub rfc822_valid {
2517    my $s = rfc822_strip_comments(shift);
2518
2519    if (!$rfc822re) {
2520        $rfc822re = make_rfc822re();
2521    }
2522
2523    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2524}
2525
2526#   validlist: In scalar context, returns true if the parameter is an RFC822
2527#              valid list of addresses.
2528#
2529#              In list context, returns an empty list on failure (an invalid
2530#              address was found); otherwise a list whose first element is the
2531#              number of addresses found and whose remaining elements are the
2532#              addresses.  This is needed to disambiguate failure (invalid)
2533#              from success with no addresses found, because an empty string is
2534#              a valid list.
2535
2536sub rfc822_validlist {
2537    my $s = rfc822_strip_comments(shift);
2538
2539    if (!$rfc822re) {
2540        $rfc822re = make_rfc822re();
2541    }
2542    # * null list items are valid according to the RFC
2543    # * the '1' business is to aid in distinguishing failure from no results
2544
2545    my @r;
2546    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2547	$s =~ m/^$rfc822_char*$/) {
2548        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2549            push(@r, $1);
2550        }
2551        return wantarray ? (scalar(@r), @r) : 1;
2552    }
2553    return wantarray ? () : 0;
2554}
2555