xref: /qemu/scripts/get_maintainer.pl (revision 2f28d2ff)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.26';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_list = 1;
25my $email_subscriber_list = 0;
26my $email_git_penguin_chiefs = 0;
27my $email_git = 0;
28my $email_git_all_signature_types = 0;
29my $email_git_blame = 0;
30my $email_git_blame_signatures = 1;
31my $email_git_fallback = 1;
32my $email_git_min_signatures = 1;
33my $email_git_max_maintainers = 5;
34my $email_git_min_percent = 5;
35my $email_git_since = "1-year-ago";
36my $email_hg_since = "-365";
37my $interactive = 0;
38my $email_remove_duplicates = 1;
39my $email_use_mailmap = 1;
40my $output_multiline = 1;
41my $output_separator = ", ";
42my $output_roles = 0;
43my $output_rolestats = 1;
44my $scm = 0;
45my $web = 0;
46my $subsystem = 0;
47my $status = 0;
48my $keywords = 1;
49my $sections = 0;
50my $file_emails = 0;
51my $from_filename = 0;
52my $pattern_depth = 0;
53my $version = 0;
54my $help = 0;
55
56my $vcs_used = 0;
57
58my $exit = 0;
59
60my %commit_author_hash;
61my %commit_signer_hash;
62
63my @penguin_chief = ();
64push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
65#Andrew wants in on most everything - 2009/01/14
66#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
67
68my @penguin_chief_names = ();
69foreach my $chief (@penguin_chief) {
70    if ($chief =~ m/^(.*):(.*)/) {
71	my $chief_name = $1;
72	my $chief_addr = $2;
73	push(@penguin_chief_names, $chief_name);
74    }
75}
76my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
77
78# Signature types of people who are either
79# 	a) responsible for the code in question, or
80# 	b) familiar enough with it to give relevant feedback
81my @signature_tags = ();
82push(@signature_tags, "Signed-off-by:");
83push(@signature_tags, "Reviewed-by:");
84push(@signature_tags, "Acked-by:");
85
86# rfc822 email address - preloaded methods go here.
87my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
88my $rfc822_char = '[\\000-\\377]';
89
90# VCS command support: class-like functions and strings
91
92my %VCS_cmds;
93
94my %VCS_cmds_git = (
95    "execute_cmd" => \&git_execute_cmd,
96    "available" => '(which("git") ne "") && (-d ".git")',
97    "find_signers_cmd" =>
98	"git log --no-color --since=\$email_git_since " .
99	    '--format="GitCommit: %H%n' .
100		      'GitAuthor: %an <%ae>%n' .
101		      'GitDate: %aD%n' .
102		      'GitSubject: %s%n' .
103		      '%b%n"' .
104	    " -- \$file",
105    "find_commit_signers_cmd" =>
106	"git log --no-color " .
107	    '--format="GitCommit: %H%n' .
108		      'GitAuthor: %an <%ae>%n' .
109		      'GitDate: %aD%n' .
110		      'GitSubject: %s%n' .
111		      '%b%n"' .
112	    " -1 \$commit",
113    "find_commit_author_cmd" =>
114	"git log --no-color " .
115	    '--format="GitCommit: %H%n' .
116		      'GitAuthor: %an <%ae>%n' .
117		      'GitDate: %aD%n' .
118		      'GitSubject: %s%n"' .
119	    " -1 \$commit",
120    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
121    "blame_file_cmd" => "git blame -l \$file",
122    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
123    "blame_commit_pattern" => "^([0-9a-f]+) ",
124    "author_pattern" => "^GitAuthor: (.*)",
125    "subject_pattern" => "^GitSubject: (.*)",
126);
127
128my %VCS_cmds_hg = (
129    "execute_cmd" => \&hg_execute_cmd,
130    "available" => '(which("hg") ne "") && (-d ".hg")',
131    "find_signers_cmd" =>
132	"hg log --date=\$email_hg_since " .
133	    "--template='HgCommit: {node}\\n" .
134	                "HgAuthor: {author}\\n" .
135			"HgSubject: {desc}\\n'" .
136	    " -- \$file",
137    "find_commit_signers_cmd" =>
138	"hg log " .
139	    "--template='HgSubject: {desc}\\n'" .
140	    " -r \$commit",
141    "find_commit_author_cmd" =>
142	"hg log " .
143	    "--template='HgCommit: {node}\\n" .
144		        "HgAuthor: {author}\\n" .
145			"HgSubject: {desc|firstline}\\n'" .
146	    " -r \$commit",
147    "blame_range_cmd" => "",		# not supported
148    "blame_file_cmd" => "hg blame -n \$file",
149    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
150    "blame_commit_pattern" => "^([ 0-9a-f]+):",
151    "author_pattern" => "^HgAuthor: (.*)",
152    "subject_pattern" => "^HgSubject: (.*)",
153);
154
155my $conf = which_conf(".get_maintainer.conf");
156if (-f $conf) {
157    my @conf_args;
158    open(my $conffile, '<', "$conf")
159	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
160
161    while (<$conffile>) {
162	my $line = $_;
163
164	$line =~ s/\s*\n?$//g;
165	$line =~ s/^\s*//g;
166	$line =~ s/\s+/ /g;
167
168	next if ($line =~ m/^\s*#/);
169	next if ($line =~ m/^\s*$/);
170
171	my @words = split(" ", $line);
172	foreach my $word (@words) {
173	    last if ($word =~ m/^#/);
174	    push (@conf_args, $word);
175	}
176    }
177    close($conffile);
178    unshift(@ARGV, @conf_args) if @conf_args;
179}
180
181if (!GetOptions(
182		'email!' => \$email,
183		'git!' => \$email_git,
184		'git-all-signature-types!' => \$email_git_all_signature_types,
185		'git-blame!' => \$email_git_blame,
186		'git-blame-signatures!' => \$email_git_blame_signatures,
187		'git-fallback!' => \$email_git_fallback,
188		'git-chief-penguins!' => \$email_git_penguin_chiefs,
189		'git-min-signatures=i' => \$email_git_min_signatures,
190		'git-max-maintainers=i' => \$email_git_max_maintainers,
191		'git-min-percent=i' => \$email_git_min_percent,
192		'git-since=s' => \$email_git_since,
193		'hg-since=s' => \$email_hg_since,
194		'i|interactive!' => \$interactive,
195		'remove-duplicates!' => \$email_remove_duplicates,
196		'mailmap!' => \$email_use_mailmap,
197		'm!' => \$email_maintainer,
198		'n!' => \$email_usename,
199		'l!' => \$email_list,
200		's!' => \$email_subscriber_list,
201		'multiline!' => \$output_multiline,
202		'roles!' => \$output_roles,
203		'rolestats!' => \$output_rolestats,
204		'separator=s' => \$output_separator,
205		'subsystem!' => \$subsystem,
206		'status!' => \$status,
207		'scm!' => \$scm,
208		'web!' => \$web,
209		'pattern-depth=i' => \$pattern_depth,
210		'k|keywords!' => \$keywords,
211		'sections!' => \$sections,
212		'fe|file-emails!' => \$file_emails,
213		'f|file' => \$from_filename,
214		'v|version' => \$version,
215		'h|help|usage' => \$help,
216		)) {
217    die "$P: invalid argument - use --help if necessary\n";
218}
219
220if ($help != 0) {
221    usage();
222    exit 0;
223}
224
225if ($version != 0) {
226    print("${P} ${V}\n");
227    exit 0;
228}
229
230if (-t STDIN && !@ARGV) {
231    # We're talking to a terminal, but have no command line arguments.
232    die "$P: missing patchfile or -f file - use --help if necessary\n";
233}
234
235$output_multiline = 0 if ($output_separator ne ", ");
236$output_rolestats = 1 if ($interactive);
237$output_roles = 1 if ($output_rolestats);
238
239if ($sections) {
240    $email = 0;
241    $email_list = 0;
242    $scm = 0;
243    $status = 0;
244    $subsystem = 0;
245    $web = 0;
246    $keywords = 0;
247    $interactive = 0;
248} else {
249    my $selections = $email + $scm + $status + $subsystem + $web;
250    if ($selections == 0) {
251	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
252    }
253}
254
255if ($email &&
256    ($email_maintainer + $email_list + $email_subscriber_list +
257     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
258    die "$P: Please select at least 1 email option\n";
259}
260
261if (!top_of_tree($lk_path)) {
262    die "$P: The current directory does not appear to be "
263	. "a QEMU source tree.\n";
264}
265
266## Read MAINTAINERS for type/value pairs
267
268my @typevalue = ();
269my %keyword_hash;
270
271open (my $maint, '<', "${lk_path}MAINTAINERS")
272    or die "$P: Can't open MAINTAINERS: $!\n";
273while (<$maint>) {
274    my $line = $_;
275
276    if ($line =~ m/^(\C):\s*(.*)/) {
277	my $type = $1;
278	my $value = $2;
279
280	##Filename pattern matching
281	if ($type eq "F" || $type eq "X") {
282	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
283	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
284	    $value =~ s/\?/\./g;         ##Convert ? to .
285	    ##if pattern is a directory and it lacks a trailing slash, add one
286	    if ((-d $value)) {
287		$value =~ s@([^/])$@$1/@;
288	    }
289	} elsif ($type eq "K") {
290	    $keyword_hash{@typevalue} = $value;
291	}
292	push(@typevalue, "$type:$value");
293    } elsif (!/^(\s)*$/) {
294	$line =~ s/\n$//g;
295	push(@typevalue, $line);
296    }
297}
298close($maint);
299
300
301#
302# Read mail address map
303#
304
305my $mailmap;
306
307read_mailmap();
308
309sub read_mailmap {
310    $mailmap = {
311	names => {},
312	addresses => {}
313    };
314
315    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
316
317    open(my $mailmap_file, '<', "${lk_path}.mailmap")
318	or warn "$P: Can't open .mailmap: $!\n";
319
320    while (<$mailmap_file>) {
321	s/#.*$//; #strip comments
322	s/^\s+|\s+$//g; #trim
323
324	next if (/^\s*$/); #skip empty lines
325	#entries have one of the following formats:
326	# name1 <mail1>
327	# <mail1> <mail2>
328	# name1 <mail1> <mail2>
329	# name1 <mail1> name2 <mail2>
330	# (see man git-shortlog)
331	if (/^(.+)<(.+)>$/) {
332	    my $real_name = $1;
333	    my $address = $2;
334
335	    $real_name =~ s/\s+$//;
336	    ($real_name, $address) = parse_email("$real_name <$address>");
337	    $mailmap->{names}->{$address} = $real_name;
338
339	} elsif (/^<([^\s]+)>\s*<([^\s]+)>$/) {
340	    my $real_address = $1;
341	    my $wrong_address = $2;
342
343	    $mailmap->{addresses}->{$wrong_address} = $real_address;
344
345	} elsif (/^(.+)<([^\s]+)>\s*<([^\s]+)>$/) {
346	    my $real_name = $1;
347	    my $real_address = $2;
348	    my $wrong_address = $3;
349
350	    $real_name =~ s/\s+$//;
351	    ($real_name, $real_address) =
352		parse_email("$real_name <$real_address>");
353	    $mailmap->{names}->{$wrong_address} = $real_name;
354	    $mailmap->{addresses}->{$wrong_address} = $real_address;
355
356	} elsif (/^(.+)<([^\s]+)>\s*([^\s].*)<([^\s]+)>$/) {
357	    my $real_name = $1;
358	    my $real_address = $2;
359	    my $wrong_name = $3;
360	    my $wrong_address = $4;
361
362	    $real_name =~ s/\s+$//;
363	    ($real_name, $real_address) =
364		parse_email("$real_name <$real_address>");
365
366	    $wrong_name =~ s/\s+$//;
367	    ($wrong_name, $wrong_address) =
368		parse_email("$wrong_name <$wrong_address>");
369
370	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
371	    $mailmap->{names}->{$wrong_email} = $real_name;
372	    $mailmap->{addresses}->{$wrong_email} = $real_address;
373	}
374    }
375    close($mailmap_file);
376}
377
378## use the filenames on the command line or find the filenames in the patchfiles
379
380my @files = ();
381my @range = ();
382my @keyword_tvi = ();
383my @file_emails = ();
384
385if (!@ARGV) {
386    push(@ARGV, "&STDIN");
387}
388
389foreach my $file (@ARGV) {
390    if ($file ne "&STDIN") {
391	##if $file is a directory and it lacks a trailing slash, add one
392	if ((-d $file)) {
393	    $file =~ s@([^/])$@$1/@;
394	} elsif (!(-f $file)) {
395	    die "$P: file '${file}' not found\n";
396	}
397    }
398    if ($from_filename) {
399	push(@files, $file);
400	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
401	    open(my $f, '<', $file)
402		or die "$P: Can't open $file: $!\n";
403	    my $text = do { local($/) ; <$f> };
404	    close($f);
405	    if ($keywords) {
406		foreach my $line (keys %keyword_hash) {
407		    if ($text =~ m/$keyword_hash{$line}/x) {
408			push(@keyword_tvi, $line);
409		    }
410		}
411	    }
412	    if ($file_emails) {
413		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
414		push(@file_emails, clean_file_emails(@poss_addr));
415	    }
416	}
417    } else {
418	my $file_cnt = @files;
419	my $lastfile;
420
421	open(my $patch, "< $file")
422	    or die "$P: Can't open $file: $!\n";
423
424	# We can check arbitrary information before the patch
425	# like the commit message, mail headers, etc...
426	# This allows us to match arbitrary keywords against any part
427	# of a git format-patch generated file (subject tags, etc...)
428
429	my $patch_prefix = "";			#Parsing the intro
430
431	while (<$patch>) {
432	    my $patch_line = $_;
433	    if (m/^\+\+\+\s+(\S+)/) {
434		my $filename = $1;
435		$filename =~ s@^[^/]*/@@;
436		$filename =~ s@\n@@;
437		$lastfile = $filename;
438		push(@files, $filename);
439		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
440	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
441		if ($email_git_blame) {
442		    push(@range, "$lastfile:$1:$2");
443		}
444	    } elsif ($keywords) {
445		foreach my $line (keys %keyword_hash) {
446		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
447			push(@keyword_tvi, $line);
448		    }
449		}
450	    }
451	}
452	close($patch);
453
454	if ($file_cnt == @files) {
455	    warn "$P: file '${file}' doesn't appear to be a patch.  "
456		. "Add -f to options?\n";
457	}
458	@files = sort_and_uniq(@files);
459    }
460}
461
462@file_emails = uniq(@file_emails);
463
464my %email_hash_name;
465my %email_hash_address;
466my @email_to = ();
467my %hash_list_to;
468my @list_to = ();
469my @scm = ();
470my @web = ();
471my @subsystem = ();
472my @status = ();
473my %deduplicate_name_hash = ();
474my %deduplicate_address_hash = ();
475my $signature_pattern;
476
477my @maintainers = get_maintainers();
478
479if (@maintainers) {
480    @maintainers = merge_email(@maintainers);
481    output(@maintainers);
482}
483
484if ($scm) {
485    @scm = uniq(@scm);
486    output(@scm);
487}
488
489if ($status) {
490    @status = uniq(@status);
491    output(@status);
492}
493
494if ($subsystem) {
495    @subsystem = uniq(@subsystem);
496    output(@subsystem);
497}
498
499if ($web) {
500    @web = uniq(@web);
501    output(@web);
502}
503
504exit($exit);
505
506sub range_is_maintained {
507    my ($start, $end) = @_;
508
509    for (my $i = $start; $i < $end; $i++) {
510	my $line = $typevalue[$i];
511	if ($line =~ m/^(\C):\s*(.*)/) {
512	    my $type = $1;
513	    my $value = $2;
514	    if ($type eq 'S') {
515		if ($value =~ /(maintain|support)/i) {
516		    return 1;
517		}
518	    }
519	}
520    }
521    return 0;
522}
523
524sub range_has_maintainer {
525    my ($start, $end) = @_;
526
527    for (my $i = $start; $i < $end; $i++) {
528	my $line = $typevalue[$i];
529	if ($line =~ m/^(\C):\s*(.*)/) {
530	    my $type = $1;
531	    my $value = $2;
532	    if ($type eq 'M') {
533		return 1;
534	    }
535	}
536    }
537    return 0;
538}
539
540sub get_maintainers {
541    %email_hash_name = ();
542    %email_hash_address = ();
543    %commit_author_hash = ();
544    %commit_signer_hash = ();
545    @email_to = ();
546    %hash_list_to = ();
547    @list_to = ();
548    @scm = ();
549    @web = ();
550    @subsystem = ();
551    @status = ();
552    %deduplicate_name_hash = ();
553    %deduplicate_address_hash = ();
554    if ($email_git_all_signature_types) {
555	$signature_pattern = "(.+?)[Bb][Yy]:";
556    } else {
557	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
558    }
559
560    # Find responsible parties
561
562    my %exact_pattern_match_hash = ();
563
564    foreach my $file (@files) {
565
566	my %hash;
567	my $tvi = find_first_section();
568	while ($tvi < @typevalue) {
569	    my $start = find_starting_index($tvi);
570	    my $end = find_ending_index($tvi);
571	    my $exclude = 0;
572	    my $i;
573
574	    #Do not match excluded file patterns
575
576	    for ($i = $start; $i < $end; $i++) {
577		my $line = $typevalue[$i];
578		if ($line =~ m/^(\C):\s*(.*)/) {
579		    my $type = $1;
580		    my $value = $2;
581		    if ($type eq 'X') {
582			if (file_match_pattern($file, $value)) {
583			    $exclude = 1;
584			    last;
585			}
586		    }
587		}
588	    }
589
590	    if (!$exclude) {
591		for ($i = $start; $i < $end; $i++) {
592		    my $line = $typevalue[$i];
593		    if ($line =~ m/^(\C):\s*(.*)/) {
594			my $type = $1;
595			my $value = $2;
596			if ($type eq 'F') {
597			    if (file_match_pattern($file, $value)) {
598				my $value_pd = ($value =~ tr@/@@);
599				my $file_pd = ($file  =~ tr@/@@);
600				$value_pd++ if (substr($value,-1,1) ne "/");
601				$value_pd = -1 if ($value =~ /^\.\*/);
602				if ($value_pd >= $file_pd &&
603				    range_is_maintained($start, $end) &&
604				    range_has_maintainer($start, $end)) {
605				    $exact_pattern_match_hash{$file} = 1;
606				}
607				if ($pattern_depth == 0 ||
608				    (($file_pd - $value_pd) < $pattern_depth)) {
609				    $hash{$tvi} = $value_pd;
610				}
611			    }
612			}
613		    }
614		}
615	    }
616	    $tvi = $end + 1;
617	}
618
619	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
620	    add_categories($line);
621	    if ($sections) {
622		my $i;
623		my $start = find_starting_index($line);
624		my $end = find_ending_index($line);
625		for ($i = $start; $i < $end; $i++) {
626		    my $line = $typevalue[$i];
627		    if ($line =~ /^[FX]:/) {		##Restore file patterns
628			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
629			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
630			$line =~ s/\\\./\./g;       	##Convert \. to .
631			$line =~ s/\.\*/\*/g;       	##Convert .* to *
632		    }
633		    $line =~ s/^([A-Z]):/$1:\t/g;
634		    print("$line\n");
635		}
636		print("\n");
637	    }
638	}
639    }
640
641    if ($keywords) {
642	@keyword_tvi = sort_and_uniq(@keyword_tvi);
643	foreach my $line (@keyword_tvi) {
644	    add_categories($line);
645	}
646    }
647
648    foreach my $email (@email_to, @list_to) {
649	$email->[0] = deduplicate_email($email->[0]);
650    }
651
652    foreach my $file (@files) {
653	if ($email &&
654	    ($email_git || ($email_git_fallback &&
655			    !$exact_pattern_match_hash{$file}))) {
656	    vcs_file_signoffs($file);
657	}
658	if ($email && $email_git_blame) {
659	    vcs_file_blame($file);
660	}
661    }
662
663    if ($email) {
664	foreach my $chief (@penguin_chief) {
665	    if ($chief =~ m/^(.*):(.*)/) {
666		my $email_address;
667
668		$email_address = format_email($1, $2, $email_usename);
669		if ($email_git_penguin_chiefs) {
670		    push(@email_to, [$email_address, 'chief penguin']);
671		} else {
672		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
673		}
674	    }
675	}
676
677	foreach my $email (@file_emails) {
678	    my ($name, $address) = parse_email($email);
679
680	    my $tmp_email = format_email($name, $address, $email_usename);
681	    push_email_address($tmp_email, '');
682	    add_role($tmp_email, 'in file');
683	}
684    }
685
686    my @to = ();
687    if ($email || $email_list) {
688	if ($email) {
689	    @to = (@to, @email_to);
690	}
691	if ($email_list) {
692	    @to = (@to, @list_to);
693	}
694    }
695
696    if ($interactive) {
697	@to = interactive_get_maintainers(\@to);
698    }
699
700    return @to;
701}
702
703sub file_match_pattern {
704    my ($file, $pattern) = @_;
705    if (substr($pattern, -1) eq "/") {
706	if ($file =~ m@^$pattern@) {
707	    return 1;
708	}
709    } else {
710	if ($file =~ m@^$pattern@) {
711	    my $s1 = ($file =~ tr@/@@);
712	    my $s2 = ($pattern =~ tr@/@@);
713	    if ($s1 == $s2) {
714		return 1;
715	    }
716	}
717    }
718    return 0;
719}
720
721sub usage {
722    print <<EOT;
723usage: $P [options] patchfile
724       $P [options] -f file|directory
725version: $V
726
727MAINTAINER field selection options:
728  --email => print email address(es) if any
729    --git => include recent git \*-by: signers
730    --git-all-signature-types => include signers regardless of signature type
731        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
732    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
733    --git-chief-penguins => include ${penguin_chiefs}
734    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
735    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
736    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
737    --git-blame => use git blame to find modified commits for patch or file
738    --git-since => git history to use (default: $email_git_since)
739    --hg-since => hg history to use (default: $email_hg_since)
740    --interactive => display a menu (mostly useful if used with the --git option)
741    --m => include maintainer(s) if any
742    --n => include name 'Full Name <addr\@domain.tld>'
743    --l => include list(s) if any
744    --s => include subscriber only list(s) if any
745    --remove-duplicates => minimize duplicate email names/addresses
746    --roles => show roles (status:subsystem, git-signer, list, etc...)
747    --rolestats => show roles and statistics (commits/total_commits, %)
748    --file-emails => add email addresses found in -f file (default: 0 (off))
749  --scm => print SCM tree(s) if any
750  --status => print status if any
751  --subsystem => print subsystem name if any
752  --web => print website(s) if any
753
754Output type options:
755  --separator [, ] => separator for multiple entries on 1 line
756    using --separator also sets --nomultiline if --separator is not [, ]
757  --multiline => print 1 entry per line
758
759Other options:
760  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
761  --keywords => scan patch for keywords (default: $keywords)
762  --sections => print all of the subsystem sections with pattern matches
763  --mailmap => use .mailmap file (default: $email_use_mailmap)
764  --version => show version
765  --help => show this help information
766
767Default options:
768  [--email --nogit --git-fallback --m --n --l --multiline -pattern-depth=0
769   --remove-duplicates --rolestats]
770
771Notes:
772  Using "-f directory" may give unexpected results:
773      Used with "--git", git signators for _all_ files in and below
774          directory are examined as git recurses directories.
775          Any specified X: (exclude) pattern matches are _not_ ignored.
776      Used with "--nogit", directory is used as a pattern match,
777          no individual file within the directory or subdirectory
778          is matched.
779      Used with "--git-blame", does not iterate all files in directory
780  Using "--git-blame" is slow and may add old committers and authors
781      that are no longer active maintainers to the output.
782  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
783      other automated tools that expect only ["name"] <email address>
784      may not work because of additional output after <email address>.
785  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
786      not the percentage of the entire file authored.  # of commits is
787      not a good measure of amount of code authored.  1 major commit may
788      contain a thousand lines, 5 trivial commits may modify a single line.
789  If git is not installed, but mercurial (hg) is installed and an .hg
790      repository exists, the following options apply to mercurial:
791          --git,
792          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
793          --git-blame
794      Use --hg-since not --git-since to control date selection
795  File ".get_maintainer.conf", if it exists in the QEMU source root
796      directory, can change whatever get_maintainer defaults are desired.
797      Entries in this file can be any command line argument.
798      This file is prepended to any additional command line arguments.
799      Multiple lines and # comments are allowed.
800EOT
801}
802
803sub top_of_tree {
804    my ($lk_path) = @_;
805
806    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
807	$lk_path .= "/";
808    }
809    if (    (-f "${lk_path}COPYING")
810        && (-f "${lk_path}MAINTAINERS")
811        && (-f "${lk_path}Makefile")
812        && (-d "${lk_path}docs")
813        && (-f "${lk_path}VERSION")
814        && (-f "${lk_path}vl.c")) {
815	return 1;
816    }
817    return 0;
818}
819
820sub parse_email {
821    my ($formatted_email) = @_;
822
823    my $name = "";
824    my $address = "";
825
826    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
827	$name = $1;
828	$address = $2;
829    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
830	$address = $1;
831    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
832	$address = $1;
833    }
834
835    $name =~ s/^\s+|\s+$//g;
836    $name =~ s/^\"|\"$//g;
837    $address =~ s/^\s+|\s+$//g;
838
839    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
840	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
841	$name = "\"$name\"";
842    }
843
844    return ($name, $address);
845}
846
847sub format_email {
848    my ($name, $address, $usename) = @_;
849
850    my $formatted_email;
851
852    $name =~ s/^\s+|\s+$//g;
853    $name =~ s/^\"|\"$//g;
854    $address =~ s/^\s+|\s+$//g;
855
856    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
857	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
858	$name = "\"$name\"";
859    }
860
861    if ($usename) {
862	if ("$name" eq "") {
863	    $formatted_email = "$address";
864	} else {
865	    $formatted_email = "$name <$address>";
866	}
867    } else {
868	$formatted_email = $address;
869    }
870
871    return $formatted_email;
872}
873
874sub find_first_section {
875    my $index = 0;
876
877    while ($index < @typevalue) {
878	my $tv = $typevalue[$index];
879	if (($tv =~ m/^(\C):\s*(.*)/)) {
880	    last;
881	}
882	$index++;
883    }
884
885    return $index;
886}
887
888sub find_starting_index {
889    my ($index) = @_;
890
891    while ($index > 0) {
892	my $tv = $typevalue[$index];
893	if (!($tv =~ m/^(\C):\s*(.*)/)) {
894	    last;
895	}
896	$index--;
897    }
898
899    return $index;
900}
901
902sub find_ending_index {
903    my ($index) = @_;
904
905    while ($index < @typevalue) {
906	my $tv = $typevalue[$index];
907	if (!($tv =~ m/^(\C):\s*(.*)/)) {
908	    last;
909	}
910	$index++;
911    }
912
913    return $index;
914}
915
916sub get_maintainer_role {
917    my ($index) = @_;
918
919    my $i;
920    my $start = find_starting_index($index);
921    my $end = find_ending_index($index);
922
923    my $role;
924    my $subsystem = $typevalue[$start];
925    if (length($subsystem) > 20) {
926	$subsystem = substr($subsystem, 0, 17);
927	$subsystem =~ s/\s*$//;
928	$subsystem = $subsystem . "...";
929    }
930
931    for ($i = $start + 1; $i < $end; $i++) {
932	my $tv = $typevalue[$i];
933	if ($tv =~ m/^(\C):\s*(.*)/) {
934	    my $ptype = $1;
935	    my $pvalue = $2;
936	    if ($ptype eq "S") {
937		$role = $pvalue;
938	    }
939	}
940    }
941
942    $role = lc($role);
943    if      ($role eq "supported") {
944	$role = "supporter";
945    } elsif ($role eq "maintained") {
946	$role = "maintainer";
947    } elsif ($role eq "odd fixes") {
948	$role = "odd fixer";
949    } elsif ($role eq "orphan") {
950	$role = "orphan minder";
951    } elsif ($role eq "obsolete") {
952	$role = "obsolete minder";
953    } elsif ($role eq "buried alive in reporters") {
954	$role = "chief penguin";
955    }
956
957    return $role . ":" . $subsystem;
958}
959
960sub get_list_role {
961    my ($index) = @_;
962
963    my $i;
964    my $start = find_starting_index($index);
965    my $end = find_ending_index($index);
966
967    my $subsystem = $typevalue[$start];
968    if (length($subsystem) > 20) {
969	$subsystem = substr($subsystem, 0, 17);
970	$subsystem =~ s/\s*$//;
971	$subsystem = $subsystem . "...";
972    }
973
974    if ($subsystem eq "THE REST") {
975	$subsystem = "";
976    }
977
978    return $subsystem;
979}
980
981sub add_categories {
982    my ($index) = @_;
983
984    my $i;
985    my $start = find_starting_index($index);
986    my $end = find_ending_index($index);
987
988    push(@subsystem, $typevalue[$start]);
989
990    for ($i = $start + 1; $i < $end; $i++) {
991	my $tv = $typevalue[$i];
992	if ($tv =~ m/^(\C):\s*(.*)/) {
993	    my $ptype = $1;
994	    my $pvalue = $2;
995	    if ($ptype eq "L") {
996		my $list_address = $pvalue;
997		my $list_additional = "";
998		my $list_role = get_list_role($i);
999
1000		if ($list_role ne "") {
1001		    $list_role = ":" . $list_role;
1002		}
1003		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1004		    $list_address = $1;
1005		    $list_additional = $2;
1006		}
1007		if ($list_additional =~ m/subscribers-only/) {
1008		    if ($email_subscriber_list) {
1009			if (!$hash_list_to{lc($list_address)}) {
1010			    $hash_list_to{lc($list_address)} = 1;
1011			    push(@list_to, [$list_address,
1012					    "subscriber list${list_role}"]);
1013			}
1014		    }
1015		} else {
1016		    if ($email_list) {
1017			if (!$hash_list_to{lc($list_address)}) {
1018			    $hash_list_to{lc($list_address)} = 1;
1019			    push(@list_to, [$list_address,
1020					    "open list${list_role}"]);
1021			}
1022		    }
1023		}
1024	    } elsif ($ptype eq "M") {
1025		my ($name, $address) = parse_email($pvalue);
1026		if ($name eq "") {
1027		    if ($i > 0) {
1028			my $tv = $typevalue[$i - 1];
1029			if ($tv =~ m/^(\C):\s*(.*)/) {
1030			    if ($1 eq "P") {
1031				$name = $2;
1032				$pvalue = format_email($name, $address, $email_usename);
1033			    }
1034			}
1035		    }
1036		}
1037		if ($email_maintainer) {
1038		    my $role = get_maintainer_role($i);
1039		    push_email_addresses($pvalue, $role);
1040		}
1041	    } elsif ($ptype eq "T") {
1042		push(@scm, $pvalue);
1043	    } elsif ($ptype eq "W") {
1044		push(@web, $pvalue);
1045	    } elsif ($ptype eq "S") {
1046		push(@status, $pvalue);
1047	    }
1048	}
1049    }
1050}
1051
1052sub email_inuse {
1053    my ($name, $address) = @_;
1054
1055    return 1 if (($name eq "") && ($address eq ""));
1056    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1057    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1058
1059    return 0;
1060}
1061
1062sub push_email_address {
1063    my ($line, $role) = @_;
1064
1065    my ($name, $address) = parse_email($line);
1066
1067    if ($address eq "") {
1068	return 0;
1069    }
1070
1071    if (!$email_remove_duplicates) {
1072	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1073    } elsif (!email_inuse($name, $address)) {
1074	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1075	$email_hash_name{lc($name)}++ if ($name ne "");
1076	$email_hash_address{lc($address)}++;
1077    }
1078
1079    return 1;
1080}
1081
1082sub push_email_addresses {
1083    my ($address, $role) = @_;
1084
1085    my @address_list = ();
1086
1087    if (rfc822_valid($address)) {
1088	push_email_address($address, $role);
1089    } elsif (@address_list = rfc822_validlist($address)) {
1090	my $array_count = shift(@address_list);
1091	while (my $entry = shift(@address_list)) {
1092	    push_email_address($entry, $role);
1093	}
1094    } else {
1095	if (!push_email_address($address, $role)) {
1096	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1097	}
1098    }
1099}
1100
1101sub add_role {
1102    my ($line, $role) = @_;
1103
1104    my ($name, $address) = parse_email($line);
1105    my $email = format_email($name, $address, $email_usename);
1106
1107    foreach my $entry (@email_to) {
1108	if ($email_remove_duplicates) {
1109	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1110	    if (($name eq $entry_name || $address eq $entry_address)
1111		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1112	    ) {
1113		if ($entry->[1] eq "") {
1114		    $entry->[1] = "$role";
1115		} else {
1116		    $entry->[1] = "$entry->[1],$role";
1117		}
1118	    }
1119	} else {
1120	    if ($email eq $entry->[0]
1121		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1122	    ) {
1123		if ($entry->[1] eq "") {
1124		    $entry->[1] = "$role";
1125		} else {
1126		    $entry->[1] = "$entry->[1],$role";
1127		}
1128	    }
1129	}
1130    }
1131}
1132
1133sub which {
1134    my ($bin) = @_;
1135
1136    foreach my $path (split(/:/, $ENV{PATH})) {
1137	if (-e "$path/$bin") {
1138	    return "$path/$bin";
1139	}
1140    }
1141
1142    return "";
1143}
1144
1145sub which_conf {
1146    my ($conf) = @_;
1147
1148    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1149	if (-e "$path/$conf") {
1150	    return "$path/$conf";
1151	}
1152    }
1153
1154    return "";
1155}
1156
1157sub mailmap_email {
1158    my ($line) = @_;
1159
1160    my ($name, $address) = parse_email($line);
1161    my $email = format_email($name, $address, 1);
1162    my $real_name = $name;
1163    my $real_address = $address;
1164
1165    if (exists $mailmap->{names}->{$email} ||
1166	exists $mailmap->{addresses}->{$email}) {
1167	if (exists $mailmap->{names}->{$email}) {
1168	    $real_name = $mailmap->{names}->{$email};
1169	}
1170	if (exists $mailmap->{addresses}->{$email}) {
1171	    $real_address = $mailmap->{addresses}->{$email};
1172	}
1173    } else {
1174	if (exists $mailmap->{names}->{$address}) {
1175	    $real_name = $mailmap->{names}->{$address};
1176	}
1177	if (exists $mailmap->{addresses}->{$address}) {
1178	    $real_address = $mailmap->{addresses}->{$address};
1179	}
1180    }
1181    return format_email($real_name, $real_address, 1);
1182}
1183
1184sub mailmap {
1185    my (@addresses) = @_;
1186
1187    my @mapped_emails = ();
1188    foreach my $line (@addresses) {
1189	push(@mapped_emails, mailmap_email($line));
1190    }
1191    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1192    return @mapped_emails;
1193}
1194
1195sub merge_by_realname {
1196    my %address_map;
1197    my (@emails) = @_;
1198
1199    foreach my $email (@emails) {
1200	my ($name, $address) = parse_email($email);
1201	if (exists $address_map{$name}) {
1202	    $address = $address_map{$name};
1203	    $email = format_email($name, $address, 1);
1204	} else {
1205	    $address_map{$name} = $address;
1206	}
1207    }
1208}
1209
1210sub git_execute_cmd {
1211    my ($cmd) = @_;
1212    my @lines = ();
1213
1214    my $output = `$cmd`;
1215    $output =~ s/^\s*//gm;
1216    @lines = split("\n", $output);
1217
1218    return @lines;
1219}
1220
1221sub hg_execute_cmd {
1222    my ($cmd) = @_;
1223    my @lines = ();
1224
1225    my $output = `$cmd`;
1226    @lines = split("\n", $output);
1227
1228    return @lines;
1229}
1230
1231sub extract_formatted_signatures {
1232    my (@signature_lines) = @_;
1233
1234    my @type = @signature_lines;
1235
1236    s/\s*(.*):.*/$1/ for (@type);
1237
1238    # cut -f2- -d":"
1239    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1240
1241## Reformat email addresses (with names) to avoid badly written signatures
1242
1243    foreach my $signer (@signature_lines) {
1244	$signer = deduplicate_email($signer);
1245    }
1246
1247    return (\@type, \@signature_lines);
1248}
1249
1250sub vcs_find_signers {
1251    my ($cmd) = @_;
1252    my $commits;
1253    my @lines = ();
1254    my @signatures = ();
1255
1256    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1257
1258    my $pattern = $VCS_cmds{"commit_pattern"};
1259
1260    $commits = grep(/$pattern/, @lines);	# of commits
1261
1262    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1263
1264    return (0, @signatures) if !@signatures;
1265
1266    save_commits_by_author(@lines) if ($interactive);
1267    save_commits_by_signer(@lines) if ($interactive);
1268
1269    if (!$email_git_penguin_chiefs) {
1270	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1271    }
1272
1273    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1274
1275    return ($commits, @$signers_ref);
1276}
1277
1278sub vcs_find_author {
1279    my ($cmd) = @_;
1280    my @lines = ();
1281
1282    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1283
1284    if (!$email_git_penguin_chiefs) {
1285	@lines = grep(!/${penguin_chiefs}/i, @lines);
1286    }
1287
1288    return @lines if !@lines;
1289
1290    my @authors = ();
1291    foreach my $line (@lines) {
1292	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1293	    my $author = $1;
1294	    my ($name, $address) = parse_email($author);
1295	    $author = format_email($name, $address, 1);
1296	    push(@authors, $author);
1297	}
1298    }
1299
1300    save_commits_by_author(@lines) if ($interactive);
1301    save_commits_by_signer(@lines) if ($interactive);
1302
1303    return @authors;
1304}
1305
1306sub vcs_save_commits {
1307    my ($cmd) = @_;
1308    my @lines = ();
1309    my @commits = ();
1310
1311    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1312
1313    foreach my $line (@lines) {
1314	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1315	    push(@commits, $1);
1316	}
1317    }
1318
1319    return @commits;
1320}
1321
1322sub vcs_blame {
1323    my ($file) = @_;
1324    my $cmd;
1325    my @commits = ();
1326
1327    return @commits if (!(-f $file));
1328
1329    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1330	my @all_commits = ();
1331
1332	$cmd = $VCS_cmds{"blame_file_cmd"};
1333	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1334	@all_commits = vcs_save_commits($cmd);
1335
1336	foreach my $file_range_diff (@range) {
1337	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1338	    my $diff_file = $1;
1339	    my $diff_start = $2;
1340	    my $diff_length = $3;
1341	    next if ("$file" ne "$diff_file");
1342	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1343		push(@commits, $all_commits[$i]);
1344	    }
1345	}
1346    } elsif (@range) {
1347	foreach my $file_range_diff (@range) {
1348	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1349	    my $diff_file = $1;
1350	    my $diff_start = $2;
1351	    my $diff_length = $3;
1352	    next if ("$file" ne "$diff_file");
1353	    $cmd = $VCS_cmds{"blame_range_cmd"};
1354	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1355	    push(@commits, vcs_save_commits($cmd));
1356	}
1357    } else {
1358	$cmd = $VCS_cmds{"blame_file_cmd"};
1359	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1360	@commits = vcs_save_commits($cmd);
1361    }
1362
1363    foreach my $commit (@commits) {
1364	$commit =~ s/^\^//g;
1365    }
1366
1367    return @commits;
1368}
1369
1370my $printed_novcs = 0;
1371sub vcs_exists {
1372    %VCS_cmds = %VCS_cmds_git;
1373    return 1 if eval $VCS_cmds{"available"};
1374    %VCS_cmds = %VCS_cmds_hg;
1375    return 2 if eval $VCS_cmds{"available"};
1376    %VCS_cmds = ();
1377    if (!$printed_novcs) {
1378	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1379	warn("Using a git repository produces better results.\n");
1380	warn("Try latest git repository using:\n");
1381	warn("git clone git://git.qemu.org/qemu.git\n");
1382	$printed_novcs = 1;
1383    }
1384    return 0;
1385}
1386
1387sub vcs_is_git {
1388    vcs_exists();
1389    return $vcs_used == 1;
1390}
1391
1392sub vcs_is_hg {
1393    return $vcs_used == 2;
1394}
1395
1396sub interactive_get_maintainers {
1397    my ($list_ref) = @_;
1398    my @list = @$list_ref;
1399
1400    vcs_exists();
1401
1402    my %selected;
1403    my %authored;
1404    my %signed;
1405    my $count = 0;
1406    my $maintained = 0;
1407    foreach my $entry (@list) {
1408	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1409	$selected{$count} = 1;
1410	$authored{$count} = 0;
1411	$signed{$count} = 0;
1412	$count++;
1413    }
1414
1415    #menu loop
1416    my $done = 0;
1417    my $print_options = 0;
1418    my $redraw = 1;
1419    while (!$done) {
1420	$count = 0;
1421	if ($redraw) {
1422	    printf STDERR "\n%1s %2s %-65s",
1423			  "*", "#", "email/list and role:stats";
1424	    if ($email_git ||
1425		($email_git_fallback && !$maintained) ||
1426		$email_git_blame) {
1427		print STDERR "auth sign";
1428	    }
1429	    print STDERR "\n";
1430	    foreach my $entry (@list) {
1431		my $email = $entry->[0];
1432		my $role = $entry->[1];
1433		my $sel = "";
1434		$sel = "*" if ($selected{$count});
1435		my $commit_author = $commit_author_hash{$email};
1436		my $commit_signer = $commit_signer_hash{$email};
1437		my $authored = 0;
1438		my $signed = 0;
1439		$authored++ for (@{$commit_author});
1440		$signed++ for (@{$commit_signer});
1441		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1442		printf STDERR "%4d %4d", $authored, $signed
1443		    if ($authored > 0 || $signed > 0);
1444		printf STDERR "\n     %s\n", $role;
1445		if ($authored{$count}) {
1446		    my $commit_author = $commit_author_hash{$email};
1447		    foreach my $ref (@{$commit_author}) {
1448			print STDERR "     Author: @{$ref}[1]\n";
1449		    }
1450		}
1451		if ($signed{$count}) {
1452		    my $commit_signer = $commit_signer_hash{$email};
1453		    foreach my $ref (@{$commit_signer}) {
1454			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1455		    }
1456		}
1457
1458		$count++;
1459	    }
1460	}
1461	my $date_ref = \$email_git_since;
1462	$date_ref = \$email_hg_since if (vcs_is_hg());
1463	if ($print_options) {
1464	    $print_options = 0;
1465	    if (vcs_exists()) {
1466		print STDERR <<EOT
1467
1468Version Control options:
1469g  use git history      [$email_git]
1470gf use git-fallback     [$email_git_fallback]
1471b  use git blame        [$email_git_blame]
1472bs use blame signatures [$email_git_blame_signatures]
1473c# minimum commits      [$email_git_min_signatures]
1474%# min percent          [$email_git_min_percent]
1475d# history to use       [$$date_ref]
1476x# max maintainers      [$email_git_max_maintainers]
1477t  all signature types  [$email_git_all_signature_types]
1478m  use .mailmap         [$email_use_mailmap]
1479EOT
1480	    }
1481	    print STDERR <<EOT
1482
1483Additional options:
14840  toggle all
1485tm toggle maintainers
1486tg toggle git entries
1487tl toggle open list entries
1488ts toggle subscriber list entries
1489f  emails in file       [$file_emails]
1490k  keywords in file     [$keywords]
1491r  remove duplicates    [$email_remove_duplicates]
1492p# pattern match depth  [$pattern_depth]
1493EOT
1494	}
1495	print STDERR
1496"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1497
1498	my $input = <STDIN>;
1499	chomp($input);
1500
1501	$redraw = 1;
1502	my $rerun = 0;
1503	my @wish = split(/[, ]+/, $input);
1504	foreach my $nr (@wish) {
1505	    $nr = lc($nr);
1506	    my $sel = substr($nr, 0, 1);
1507	    my $str = substr($nr, 1);
1508	    my $val = 0;
1509	    $val = $1 if $str =~ /^(\d+)$/;
1510
1511	    if ($sel eq "y") {
1512		$interactive = 0;
1513		$done = 1;
1514		$output_rolestats = 0;
1515		$output_roles = 0;
1516		last;
1517	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1518		$selected{$nr - 1} = !$selected{$nr - 1};
1519	    } elsif ($sel eq "*" || $sel eq '^') {
1520		my $toggle = 0;
1521		$toggle = 1 if ($sel eq '*');
1522		for (my $i = 0; $i < $count; $i++) {
1523		    $selected{$i} = $toggle;
1524		}
1525	    } elsif ($sel eq "0") {
1526		for (my $i = 0; $i < $count; $i++) {
1527		    $selected{$i} = !$selected{$i};
1528		}
1529	    } elsif ($sel eq "t") {
1530		if (lc($str) eq "m") {
1531		    for (my $i = 0; $i < $count; $i++) {
1532			$selected{$i} = !$selected{$i}
1533			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1534		    }
1535		} elsif (lc($str) eq "g") {
1536		    for (my $i = 0; $i < $count; $i++) {
1537			$selected{$i} = !$selected{$i}
1538			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1539		    }
1540		} elsif (lc($str) eq "l") {
1541		    for (my $i = 0; $i < $count; $i++) {
1542			$selected{$i} = !$selected{$i}
1543			    if ($list[$i]->[1] =~ /^(open list)/i);
1544		    }
1545		} elsif (lc($str) eq "s") {
1546		    for (my $i = 0; $i < $count; $i++) {
1547			$selected{$i} = !$selected{$i}
1548			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1549		    }
1550		}
1551	    } elsif ($sel eq "a") {
1552		if ($val > 0 && $val <= $count) {
1553		    $authored{$val - 1} = !$authored{$val - 1};
1554		} elsif ($str eq '*' || $str eq '^') {
1555		    my $toggle = 0;
1556		    $toggle = 1 if ($str eq '*');
1557		    for (my $i = 0; $i < $count; $i++) {
1558			$authored{$i} = $toggle;
1559		    }
1560		}
1561	    } elsif ($sel eq "s") {
1562		if ($val > 0 && $val <= $count) {
1563		    $signed{$val - 1} = !$signed{$val - 1};
1564		} elsif ($str eq '*' || $str eq '^') {
1565		    my $toggle = 0;
1566		    $toggle = 1 if ($str eq '*');
1567		    for (my $i = 0; $i < $count; $i++) {
1568			$signed{$i} = $toggle;
1569		    }
1570		}
1571	    } elsif ($sel eq "o") {
1572		$print_options = 1;
1573		$redraw = 1;
1574	    } elsif ($sel eq "g") {
1575		if ($str eq "f") {
1576		    bool_invert(\$email_git_fallback);
1577		} else {
1578		    bool_invert(\$email_git);
1579		}
1580		$rerun = 1;
1581	    } elsif ($sel eq "b") {
1582		if ($str eq "s") {
1583		    bool_invert(\$email_git_blame_signatures);
1584		} else {
1585		    bool_invert(\$email_git_blame);
1586		}
1587		$rerun = 1;
1588	    } elsif ($sel eq "c") {
1589		if ($val > 0) {
1590		    $email_git_min_signatures = $val;
1591		    $rerun = 1;
1592		}
1593	    } elsif ($sel eq "x") {
1594		if ($val > 0) {
1595		    $email_git_max_maintainers = $val;
1596		    $rerun = 1;
1597		}
1598	    } elsif ($sel eq "%") {
1599		if ($str ne "" && $val >= 0) {
1600		    $email_git_min_percent = $val;
1601		    $rerun = 1;
1602		}
1603	    } elsif ($sel eq "d") {
1604		if (vcs_is_git()) {
1605		    $email_git_since = $str;
1606		} elsif (vcs_is_hg()) {
1607		    $email_hg_since = $str;
1608		}
1609		$rerun = 1;
1610	    } elsif ($sel eq "t") {
1611		bool_invert(\$email_git_all_signature_types);
1612		$rerun = 1;
1613	    } elsif ($sel eq "f") {
1614		bool_invert(\$file_emails);
1615		$rerun = 1;
1616	    } elsif ($sel eq "r") {
1617		bool_invert(\$email_remove_duplicates);
1618		$rerun = 1;
1619	    } elsif ($sel eq "m") {
1620		bool_invert(\$email_use_mailmap);
1621		read_mailmap();
1622		$rerun = 1;
1623	    } elsif ($sel eq "k") {
1624		bool_invert(\$keywords);
1625		$rerun = 1;
1626	    } elsif ($sel eq "p") {
1627		if ($str ne "" && $val >= 0) {
1628		    $pattern_depth = $val;
1629		    $rerun = 1;
1630		}
1631	    } elsif ($sel eq "h" || $sel eq "?") {
1632		print STDERR <<EOT
1633
1634Interactive mode allows you to select the various maintainers, submitters,
1635commit signers and mailing lists that could be CC'd on a patch.
1636
1637Any *'d entry is selected.
1638
1639If you have git or hg installed, you can choose to summarize the commit
1640history of files in the patch.  Also, each line of the current file can
1641be matched to its commit author and that commits signers with blame.
1642
1643Various knobs exist to control the length of time for active commit
1644tracking, the maximum number of commit authors and signers to add,
1645and such.
1646
1647Enter selections at the prompt until you are satisfied that the selected
1648maintainers are appropriate.  You may enter multiple selections separated
1649by either commas or spaces.
1650
1651EOT
1652	    } else {
1653		print STDERR "invalid option: '$nr'\n";
1654		$redraw = 0;
1655	    }
1656	}
1657	if ($rerun) {
1658	    print STDERR "git-blame can be very slow, please have patience..."
1659		if ($email_git_blame);
1660	    goto &get_maintainers;
1661	}
1662    }
1663
1664    #drop not selected entries
1665    $count = 0;
1666    my @new_emailto = ();
1667    foreach my $entry (@list) {
1668	if ($selected{$count}) {
1669	    push(@new_emailto, $list[$count]);
1670	}
1671	$count++;
1672    }
1673    return @new_emailto;
1674}
1675
1676sub bool_invert {
1677    my ($bool_ref) = @_;
1678
1679    if ($$bool_ref) {
1680	$$bool_ref = 0;
1681    } else {
1682	$$bool_ref = 1;
1683    }
1684}
1685
1686sub deduplicate_email {
1687    my ($email) = @_;
1688
1689    my $matched = 0;
1690    my ($name, $address) = parse_email($email);
1691    $email = format_email($name, $address, 1);
1692    $email = mailmap_email($email);
1693
1694    return $email if (!$email_remove_duplicates);
1695
1696    ($name, $address) = parse_email($email);
1697
1698    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
1699	$name = $deduplicate_name_hash{lc($name)}->[0];
1700	$address = $deduplicate_name_hash{lc($name)}->[1];
1701	$matched = 1;
1702    } elsif ($deduplicate_address_hash{lc($address)}) {
1703	$name = $deduplicate_address_hash{lc($address)}->[0];
1704	$address = $deduplicate_address_hash{lc($address)}->[1];
1705	$matched = 1;
1706    }
1707    if (!$matched) {
1708	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
1709	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
1710    }
1711    $email = format_email($name, $address, 1);
1712    $email = mailmap_email($email);
1713    return $email;
1714}
1715
1716sub save_commits_by_author {
1717    my (@lines) = @_;
1718
1719    my @authors = ();
1720    my @commits = ();
1721    my @subjects = ();
1722
1723    foreach my $line (@lines) {
1724	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1725	    my $author = $1;
1726	    $author = deduplicate_email($author);
1727	    push(@authors, $author);
1728	}
1729	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1730	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1731    }
1732
1733    for (my $i = 0; $i < @authors; $i++) {
1734	my $exists = 0;
1735	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
1736	    if (@{$ref}[0] eq $commits[$i] &&
1737		@{$ref}[1] eq $subjects[$i]) {
1738		$exists = 1;
1739		last;
1740	    }
1741	}
1742	if (!$exists) {
1743	    push(@{$commit_author_hash{$authors[$i]}},
1744		 [ ($commits[$i], $subjects[$i]) ]);
1745	}
1746    }
1747}
1748
1749sub save_commits_by_signer {
1750    my (@lines) = @_;
1751
1752    my $commit = "";
1753    my $subject = "";
1754
1755    foreach my $line (@lines) {
1756	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1757	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1758	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
1759	    my @signatures = ($line);
1760	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1761	    my @types = @$types_ref;
1762	    my @signers = @$signers_ref;
1763
1764	    my $type = $types[0];
1765	    my $signer = $signers[0];
1766
1767	    $signer = deduplicate_email($signer);
1768
1769	    my $exists = 0;
1770	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
1771		if (@{$ref}[0] eq $commit &&
1772		    @{$ref}[1] eq $subject &&
1773		    @{$ref}[2] eq $type) {
1774		    $exists = 1;
1775		    last;
1776		}
1777	    }
1778	    if (!$exists) {
1779		push(@{$commit_signer_hash{$signer}},
1780		     [ ($commit, $subject, $type) ]);
1781	    }
1782	}
1783    }
1784}
1785
1786sub vcs_assign {
1787    my ($role, $divisor, @lines) = @_;
1788
1789    my %hash;
1790    my $count = 0;
1791
1792    return if (@lines <= 0);
1793
1794    if ($divisor <= 0) {
1795	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
1796	$divisor = 1;
1797    }
1798
1799    @lines = mailmap(@lines);
1800
1801    return if (@lines <= 0);
1802
1803    @lines = sort(@lines);
1804
1805    # uniq -c
1806    $hash{$_}++ for @lines;
1807
1808    # sort -rn
1809    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
1810	my $sign_offs = $hash{$line};
1811	my $percent = $sign_offs * 100 / $divisor;
1812
1813	$percent = 100 if ($percent > 100);
1814	$count++;
1815	last if ($sign_offs < $email_git_min_signatures ||
1816		 $count > $email_git_max_maintainers ||
1817		 $percent < $email_git_min_percent);
1818	push_email_address($line, '');
1819	if ($output_rolestats) {
1820	    my $fmt_percent = sprintf("%.0f", $percent);
1821	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
1822	} else {
1823	    add_role($line, $role);
1824	}
1825    }
1826}
1827
1828sub vcs_file_signoffs {
1829    my ($file) = @_;
1830
1831    my @signers = ();
1832    my $commits;
1833
1834    $vcs_used = vcs_exists();
1835    return if (!$vcs_used);
1836
1837    my $cmd = $VCS_cmds{"find_signers_cmd"};
1838    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
1839
1840    ($commits, @signers) = vcs_find_signers($cmd);
1841
1842    foreach my $signer (@signers) {
1843	$signer = deduplicate_email($signer);
1844    }
1845
1846    vcs_assign("commit_signer", $commits, @signers);
1847}
1848
1849sub vcs_file_blame {
1850    my ($file) = @_;
1851
1852    my @signers = ();
1853    my @all_commits = ();
1854    my @commits = ();
1855    my $total_commits;
1856    my $total_lines;
1857
1858    $vcs_used = vcs_exists();
1859    return if (!$vcs_used);
1860
1861    @all_commits = vcs_blame($file);
1862    @commits = uniq(@all_commits);
1863    $total_commits = @commits;
1864    $total_lines = @all_commits;
1865
1866    if ($email_git_blame_signatures) {
1867	if (vcs_is_hg()) {
1868	    my $commit_count;
1869	    my @commit_signers = ();
1870	    my $commit = join(" -r ", @commits);
1871	    my $cmd;
1872
1873	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1874	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1875
1876	    ($commit_count, @commit_signers) = vcs_find_signers($cmd);
1877
1878	    push(@signers, @commit_signers);
1879	} else {
1880	    foreach my $commit (@commits) {
1881		my $commit_count;
1882		my @commit_signers = ();
1883		my $cmd;
1884
1885		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
1886		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1887
1888		($commit_count, @commit_signers) = vcs_find_signers($cmd);
1889
1890		push(@signers, @commit_signers);
1891	    }
1892	}
1893    }
1894
1895    if ($from_filename) {
1896	if ($output_rolestats) {
1897	    my @blame_signers;
1898	    if (vcs_is_hg()) {{		# Double brace for last exit
1899		my $commit_count;
1900		my @commit_signers = ();
1901		@commits = uniq(@commits);
1902		@commits = sort(@commits);
1903		my $commit = join(" -r ", @commits);
1904		my $cmd;
1905
1906		$cmd = $VCS_cmds{"find_commit_author_cmd"};
1907		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1908
1909		my @lines = ();
1910
1911		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1912
1913		if (!$email_git_penguin_chiefs) {
1914		    @lines = grep(!/${penguin_chiefs}/i, @lines);
1915		}
1916
1917		last if !@lines;
1918
1919		my @authors = ();
1920		foreach my $line (@lines) {
1921		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1922			my $author = $1;
1923			$author = deduplicate_email($author);
1924			push(@authors, $author);
1925		    }
1926		}
1927
1928		save_commits_by_author(@lines) if ($interactive);
1929		save_commits_by_signer(@lines) if ($interactive);
1930
1931		push(@signers, @authors);
1932	    }}
1933	    else {
1934		foreach my $commit (@commits) {
1935		    my $i;
1936		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
1937		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
1938		    my @author = vcs_find_author($cmd);
1939		    next if !@author;
1940
1941		    my $formatted_author = deduplicate_email($author[0]);
1942
1943		    my $count = grep(/$commit/, @all_commits);
1944		    for ($i = 0; $i < $count ; $i++) {
1945			push(@blame_signers, $formatted_author);
1946		    }
1947		}
1948	    }
1949	    if (@blame_signers) {
1950		vcs_assign("authored lines", $total_lines, @blame_signers);
1951	    }
1952	}
1953	foreach my $signer (@signers) {
1954	    $signer = deduplicate_email($signer);
1955	}
1956	vcs_assign("commits", $total_commits, @signers);
1957    } else {
1958	foreach my $signer (@signers) {
1959	    $signer = deduplicate_email($signer);
1960	}
1961	vcs_assign("modified commits", $total_commits, @signers);
1962    }
1963}
1964
1965sub uniq {
1966    my (@parms) = @_;
1967
1968    my %saw;
1969    @parms = grep(!$saw{$_}++, @parms);
1970    return @parms;
1971}
1972
1973sub sort_and_uniq {
1974    my (@parms) = @_;
1975
1976    my %saw;
1977    @parms = sort @parms;
1978    @parms = grep(!$saw{$_}++, @parms);
1979    return @parms;
1980}
1981
1982sub clean_file_emails {
1983    my (@file_emails) = @_;
1984    my @fmt_emails = ();
1985
1986    foreach my $email (@file_emails) {
1987	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
1988	my ($name, $address) = parse_email($email);
1989	if ($name eq '"[,\.]"') {
1990	    $name = "";
1991	}
1992
1993	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
1994	if (@nw > 2) {
1995	    my $first = $nw[@nw - 3];
1996	    my $middle = $nw[@nw - 2];
1997	    my $last = $nw[@nw - 1];
1998
1999	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2000		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2001		(length($middle) == 1 ||
2002		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2003		$name = "$first $middle $last";
2004	    } else {
2005		$name = "$middle $last";
2006	    }
2007	}
2008
2009	if (substr($name, -1) =~ /[,\.]/) {
2010	    $name = substr($name, 0, length($name) - 1);
2011	} elsif (substr($name, -2) =~ /[,\.]"/) {
2012	    $name = substr($name, 0, length($name) - 2) . '"';
2013	}
2014
2015	if (substr($name, 0, 1) =~ /[,\.]/) {
2016	    $name = substr($name, 1, length($name) - 1);
2017	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2018	    $name = '"' . substr($name, 2, length($name) - 2);
2019	}
2020
2021	my $fmt_email = format_email($name, $address, $email_usename);
2022	push(@fmt_emails, $fmt_email);
2023    }
2024    return @fmt_emails;
2025}
2026
2027sub merge_email {
2028    my @lines;
2029    my %saw;
2030
2031    for (@_) {
2032	my ($address, $role) = @$_;
2033	if (!$saw{$address}) {
2034	    if ($output_roles) {
2035		push(@lines, "$address ($role)");
2036	    } else {
2037		push(@lines, $address);
2038	    }
2039	    $saw{$address} = 1;
2040	}
2041    }
2042
2043    return @lines;
2044}
2045
2046sub output {
2047    my (@parms) = @_;
2048
2049    if ($output_multiline) {
2050	foreach my $line (@parms) {
2051	    print("${line}\n");
2052	}
2053    } else {
2054	print(join($output_separator, @parms));
2055	print("\n");
2056    }
2057}
2058
2059my $rfc822re;
2060
2061sub make_rfc822re {
2062#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2063#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2064#   This regexp will only work on addresses which have had comments stripped
2065#   and replaced with rfc822_lwsp.
2066
2067    my $specials = '()<>@,;:\\\\".\\[\\]';
2068    my $controls = '\\000-\\037\\177';
2069
2070    my $dtext = "[^\\[\\]\\r\\\\]";
2071    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2072
2073    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2074
2075#   Use zero-width assertion to spot the limit of an atom.  A simple
2076#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2077    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2078    my $word = "(?:$atom|$quoted_string)";
2079    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2080
2081    my $sub_domain = "(?:$atom|$domain_literal)";
2082    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2083
2084    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2085
2086    my $phrase = "$word*";
2087    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2088    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2089    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2090
2091    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2092    my $address = "(?:$mailbox|$group)";
2093
2094    return "$rfc822_lwsp*$address";
2095}
2096
2097sub rfc822_strip_comments {
2098    my $s = shift;
2099#   Recursively remove comments, and replace with a single space.  The simpler
2100#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2101#   chars in atoms, for example.
2102
2103    while ($s =~ s/^((?:[^"\\]|\\.)*
2104                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2105                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2106    return $s;
2107}
2108
2109#   valid: returns true if the parameter is an RFC822 valid address
2110#
2111sub rfc822_valid {
2112    my $s = rfc822_strip_comments(shift);
2113
2114    if (!$rfc822re) {
2115        $rfc822re = make_rfc822re();
2116    }
2117
2118    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2119}
2120
2121#   validlist: In scalar context, returns true if the parameter is an RFC822
2122#              valid list of addresses.
2123#
2124#              In list context, returns an empty list on failure (an invalid
2125#              address was found); otherwise a list whose first element is the
2126#              number of addresses found and whose remaining elements are the
2127#              addresses.  This is needed to disambiguate failure (invalid)
2128#              from success with no addresses found, because an empty string is
2129#              a valid list.
2130
2131sub rfc822_validlist {
2132    my $s = rfc822_strip_comments(shift);
2133
2134    if (!$rfc822re) {
2135        $rfc822re = make_rfc822re();
2136    }
2137    # * null list items are valid according to the RFC
2138    # * the '1' business is to aid in distinguishing failure from no results
2139
2140    my @r;
2141    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2142	$s =~ m/^$rfc822_char*$/) {
2143        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2144            push(@r, $1);
2145        }
2146        return wantarray ? (scalar(@r), @r) : 1;
2147    }
2148    return wantarray ? () : 0;
2149}
2150