1#!/usr/bin/env perl 2package Porting::updateAUTHORS; 3use strict; 4use warnings; 5use Getopt::Long qw(GetOptions); 6use Pod::Usage qw(pod2usage); 7use Data::Dumper; 8use Encode qw(encode_utf8 decode_utf8 decode); 9 10# The style of this file is determined by: 11# 12# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \ 13# -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \ 14# -fsb='#start-no-tidy' -fse='#end-no-tidy' 15 16# Info and config for passing to git log. 17# %an: author name 18# %aN: author name (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 19# %ae: author email 20# %aE: author email (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 21# %cn: committer name 22# %cN: committer name (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 23# %ce: committer email 24# %cE: committer email (respecting .mailmap, see git-shortlog(1) or git-blame(1)) 25# %H: commit hash 26# %h: abbreviated commit hash 27# %s: subject 28# %x00: print a byte from a hex code 29 30my %field_spec= ( 31 "an" => "author_name", 32 "aN" => "author_name_mm", 33 "ae" => "author_email", 34 "aE" => "author_email_mm", 35 "cn" => "committer_name", 36 "cN" => "committer_name_mm", 37 "ce" => "committer_email", 38 "cE" => "committer_email_mm", 39 "H" => "commit_hash", 40 "h" => "abbrev_hash", 41 "s" => "commit_subject", 42); 43 44my @field_codes= sort keys %field_spec; 45my @field_names= map { $field_spec{$_} } @field_codes; 46my $tformat= join "%x00", map { "%" . $_ } @field_codes; 47 48sub _make_name_author_info { 49 my ($author_info, $commit_info, $name_key)= @_; 50 (my $email_key= $name_key) =~ s/name/email/; 51 my $email= $commit_info->{$email_key}; 52 my $name= $commit_info->{$name_key}; 53 54 my $line= $author_info->{"email2line"}{$email} 55 // $author_info->{"name2line"}{$name}; 56 57 $line //= sprintf "%-31s<%s>", 58 $commit_info->{$name_key}, $commit_info->{$email_key}; 59 return $line; 60} 61 62sub _make_name_simple { 63 my ($commit_info, $key)= @_; 64 my $name_key= $key . "_name"; 65 my $email_key= $key . "_email"; 66 return sprintf "%s <%s>", $commit_info->{$name_key}, 67 lc($commit_info->{$email_key}); 68} 69 70sub read_commit_log { 71 my ($author_info, $mailmap_info)= @_; 72 $author_info ||= {}; 73 open my $fh, qq(git log --pretty='tformat:$tformat' |); 74 75 while (defined(my $line= <$fh>)) { 76 chomp $line; 77 $line= decode_utf8($line); 78 my $commit_info= {}; 79 @{$commit_info}{@field_names}= split /\0/, $line, 0 + @field_names; 80 81 my $author_name_mm= _make_name_author_info($author_info, $commit_info, 82 "author_name_mm"); 83 84 my $committer_name_mm= 85 _make_name_author_info($author_info, $commit_info, 86 "committer_name_mm"); 87 88 my $author_name_real= _make_name_simple($commit_info, "author"); 89 90 my $committer_name_real= _make_name_simple($commit_info, "committer"); 91 92 _check_name_mailmap( 93 $mailmap_info, $author_name_mm, $author_name_real, 94 $commit_info, "author name" 95 ); 96 _check_name_mailmap($mailmap_info, $committer_name_mm, 97 $committer_name_real, $commit_info, "committer name"); 98 99 $author_info->{"lines"}{$author_name_mm}++; 100 $author_info->{"lines"}{$committer_name_mm}++; 101 } 102 return $author_info; 103} 104 105sub read_authors { 106 my ($authors_file)= @_; 107 $authors_file ||= "AUTHORS"; 108 109 my @authors_preamble; 110 open my $in_fh, "<", $authors_file 111 or die "Failed to open for read '$authors_file': $!"; 112 while (defined(my $line= <$in_fh>)) { 113 chomp $line; 114 push @authors_preamble, $line; 115 if ($line =~ /^--/) { 116 last; 117 } 118 } 119 my %author_info; 120 while (defined(my $line= <$in_fh>)) { 121 chomp $line; 122 $line= decode_utf8($line); 123 my ($name, $email); 124 my $copy= $line; 125 $copy =~ s/\s+\z//; 126 if ($copy =~ s/<([^<>]*)>//) { 127 $email= $1; 128 } 129 elsif ($copy =~ s/\s+(\@\w+)\z//) { 130 $email= $1; 131 } 132 $copy =~ s/\s+\z//; 133 $name= $copy; 134 $email //= "unknown"; 135 $email= lc($email); 136 137 $author_info{"lines"}{$line}++; 138 $author_info{"email2line"}{$email}= $line 139 if $email and $email ne "unknown"; 140 $author_info{"name2line"}{$name}= $line 141 if $name and $name ne "unknown"; 142 $author_info{"email2name"}{ lc($email) }= $name 143 if $email 144 and $name 145 and $email ne "unknown"; 146 $author_info{"name2email"}{$name}= $email 147 if $name and $name ne "unknown"; 148 } 149 close $in_fh 150 or die "Failed to close '$authors_file': $!"; 151 return (\%author_info, \@authors_preamble); 152} 153 154sub update_authors { 155 my ($author_info, $authors_preamble, $authors_file)= @_; 156 $authors_file ||= "AUTHORS"; 157 my $authors_file_new= $authors_file . ".new"; 158 open my $out_fh, ">", $authors_file_new 159 or die "Failed to open for write '$authors_file_new': $!"; 160 binmode $out_fh; 161 foreach my $line (@$authors_preamble) { 162 print $out_fh encode_utf8($line), "\n" 163 or die "Failed to print to '$authors_file_new': $!"; 164 } 165 foreach my $author (_sorted_hash_keys($author_info->{"lines"})) { 166 next if $author =~ /^unknown/; 167 if ($author =~ s/\s*<unknown>\z//) { 168 next if $author =~ /^\w+$/; 169 } 170 print $out_fh encode_utf8($author), "\n" 171 or die "Failed to print to '$authors_file_new': $!"; 172 } 173 close $out_fh 174 or die "Failed to close '$authors_file_new': $!"; 175 rename $authors_file_new, $authors_file 176 or die "Failed to rename '$authors_file_new' to '$authors_file':$!"; 177 return 1; # ok 178} 179 180sub read_mailmap { 181 my ($mailmap_file)= @_; 182 $mailmap_file ||= ".mailmap"; 183 184 open my $in, "<", $mailmap_file 185 or die "Failed to read '$mailmap_file': $!"; 186 my %mailmap_hash; 187 my @mailmap_preamble; 188 my $line_num= 0; 189 while (defined(my $line= <$in>)) { 190 ++$line_num; 191 next unless $line =~ /\S/; 192 chomp($line); 193 $line= decode_utf8($line); 194 if ($line =~ /^#/) { 195 if (!keys %mailmap_hash) { 196 push @mailmap_preamble, $line; 197 } 198 else { 199 die encode_utf8 "Not expecting comments after header ", 200 "finished at line $line_num!\nLine: $line\n"; 201 } 202 } 203 else { 204 $mailmap_hash{$line}= $line_num; 205 } 206 } 207 close $in; 208 return \%mailmap_hash, \@mailmap_preamble; 209} 210 211# this can be used to extract data from the checkAUTHORS data 212sub merge_mailmap_with_AUTHORS_and_checkAUTHORS_data { 213 my ($mailmap_hash, $author_info)= @_; 214 require 'Porting/checkAUTHORS.pl' or die "No authors?"; 215 my ($map, $preferred_email_or_github)= 216 Porting::checkAUTHORS::generate_known_author_map(); 217 218 foreach my $old (sort keys %$preferred_email_or_github) { 219 my $new= $preferred_email_or_github->{$old}; 220 next if $old !~ /\@/ or $new !~ /\@/ or $new eq $old; 221 my $name= $author_info->{"email2name"}{$new}; 222 if ($name) { 223 my $line= "$name <$new> <$old>"; 224 $mailmap_hash->{$line}++; 225 } 226 } 227 return 1; # ok 228} 229 230sub _sorted_hash_keys { 231 my ($hash)= @_; 232 my @sorted= sort { lc($a) cmp lc($b) || $a cmp $b } keys %$hash; 233 return @sorted; 234} 235 236sub update_mailmap { 237 my ($mailmap_hash, $mailmap_preamble, $mailmap_file)= @_; 238 $mailmap_file ||= ".mailmap"; 239 240 my $mailmap_file_new= $mailmap_file . "_new"; 241 open my $out, ">", $mailmap_file_new 242 or die "Failed to write '$mailmap_file_new':$!"; 243 binmode $out; 244 foreach my $line (@$mailmap_preamble, _sorted_hash_keys($mailmap_hash),) { 245 print $out encode_utf8($line), "\n" 246 or die "Failed to print to '$mailmap_file': $!"; 247 } 248 close $out; 249 rename $mailmap_file_new, $mailmap_file 250 or die "Failed to rename '$mailmap_file_new' to '$mailmap_file':$!"; 251 return 1; # ok 252} 253 254sub parse_mailmap_hash { 255 my ($mailmap_hash)= @_; 256 my @recs; 257 foreach my $line (sort keys %$mailmap_hash) { 258 my $line_num= $mailmap_hash->{$line}; 259 $line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)> 260 (?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x 261 or die encode_utf8 "Failed to parse line num $line_num: '$line'"; 262 if (!$1 or !$2) { 263 die encode_utf8 "Both preferred name and email are mandatory ", 264 "in line num $line_num: '$line'"; 265 } 266 267 # [ preferred_name, preferred_email, other_name, other_email ] 268 push @recs, [ $1, $2, $3, $4, $line_num ]; 269 } 270 return \@recs; 271} 272 273sub _safe_set_key { 274 my ($hash, $root_key, $key, $val, $pretty_name)= @_; 275 $hash->{$root_key}{$key} //= $val; 276 my $prev= $hash->{$root_key}{$key}; 277 if ($prev ne $val) { 278 die encode_utf8 "Collision on mapping $root_key: " 279 . " '$key' maps to '$prev' and '$val'\n"; 280 } 281} 282 283my $O2P= "other2preferred"; 284my $O2PN= "other2preferred_name"; 285my $O2PE= "other2preferred_email"; 286my $P2O= "preferred2other"; 287my $N2P= "name2preferred"; 288my $E2P= "email2preferred"; 289 290my $blurb= ""; # FIXME - replace with a nice message 291 292sub _check_name_mailmap { 293 my ($mailmap_info, $auth_name, $raw_name, $commit_info, $descr)= @_; 294 my $name= $auth_name; 295 $name =~ s/<([^<>]+)>/<\L$1\E>/ 296 or $name =~ s/(\s)(\@\w+)\z/$1<\L$2\E>/ 297 or $name .= " <unknown>"; 298 299 $name =~ s/\s+/ /g; 300 301 if (!$mailmap_info->{$P2O}{$name}) { 302 warn encode_utf8 sprintf "Unknown %s '%s' in commit %s '%s'\n%s", 303 $descr, 304 $name, $commit_info->{"abbrev_hash"}, 305 $commit_info->{"commit_subject"}, 306 $blurb; 307 $mailmap_info->{add}{"$name $raw_name"}++; 308 return 0; 309 } 310 elsif (!$mailmap_info->{$P2O}{$name}{$raw_name}) { 311 $mailmap_info->{add}{"$name $raw_name"}++; 312 } 313 return 1; 314} 315 316sub check_fix_mailmap_hash { 317 my ($mailmap_hash, $authors_info)= @_; 318 my $parsed= parse_mailmap_hash($mailmap_hash); 319 my @fixed; 320 my %seen_map; 321 my %pref_groups; 322 323 # first pass through the data, do any conversions, eg, LC 324 # the email address, decode any MIME-Header style email addresses. 325 # We also correct any preferred name entries so they match what 326 # we already have in AUTHORS, and check that there aren't collisions 327 # or other issues in the data. 328 foreach my $rec (@$parsed) { 329 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec; 330 $pemail= lc($pemail); 331 $oemail= lc($oemail) if defined $oemail; 332 if ($pname =~ /=\?UTF-8\?/) { 333 $pname= decode("MIME-Header", $pname); 334 } 335 my $auth_email= $authors_info->{"name2email"}{$pname}; 336 if ($auth_email) { 337 ## this name exists in authors, so use its email data for pemail 338 $pemail= $auth_email; 339 } 340 my $auth_name= $authors_info->{"email2name"}{$pemail}; 341 if ($auth_name) { 342 ## this email exists in authors, so use its name data for pname 343 $pname= $auth_name; 344 } 345 346 # neither name nor email exist in authors. 347 if ($pname ne "unknown") { 348 if (my $email= $seen_map{"name"}{$pname}) { 349 ## we have seen this pname before, check the pemail 350 ## is consistent 351 if ($email ne $pemail) { 352 warn encode_utf8 "Inconsistent emails for name '$pname'" 353 . " at line num $line_num: keeping '$email'," 354 . " ignoring '$pemail'\n"; 355 $pemail= $email; 356 } 357 } 358 else { 359 $seen_map{"name"}{$pname}= $pemail; 360 } 361 } 362 if ($pemail ne "unknown") { 363 if (my $name= $seen_map{"email"}{$pemail}) { 364 ## we have seen this preferred_email before, check the preferred_name 365 ## is consistent 366 if ($name ne $pname) { 367 warn encode_utf8 "Inconsistent name for email '$pemail'" 368 . " at line num $line_num: keeping '$name', ignoring" 369 . " '$pname'\n"; 370 $pname= $name; 371 } 372 } 373 else { 374 $seen_map{"email"}{$pemail}= $pname; 375 } 376 } 377 378 # Build an index of "preferred name/email" to other-email, other name 379 # we use this later to remove redundant entries missing a name. 380 $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }= 381 [ $pname, $pemail, $oname, $oemail, $line_num ]; 382 } 383 384 # this removes entries like 385 # Joe <blogs> <whatever> 386 # where there is a corresponding 387 # Joe <blogs> Joe X <blogs> 388 foreach my $pref (_sorted_hash_keys(\%pref_groups)) { 389 my $entries= $pref_groups{$pref}; 390 foreach my $email (_sorted_hash_keys($entries)) { 391 my @names= _sorted_hash_keys($entries->{$email}); 392 if ($names[0] eq "" and @names > 1) { 393 shift @names; 394 } 395 foreach my $name (@names) { 396 push @fixed, $entries->{$email}{$name}; 397 } 398 } 399 } 400 401 # final pass through the dataset, build up a database 402 # we will use later for checks and updates, and reconstruct 403 # the canonical entries. 404 my $new_mailmap_hash= {}; 405 my $mailmap_info= {}; 406 foreach my $rec (@fixed) { 407 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec; 408 my $preferred= "$pname <$pemail>"; 409 my $other; 410 if (defined $oemail) { 411 $other= $oname ? "$oname <$oemail>" : "<$oemail>"; 412 } 413 if ($other and $other ne "<unknown>") { 414 _safe_set_key($mailmap_info, $O2P, $other, $preferred); 415 _safe_set_key($mailmap_info, $O2PN, $other, $pname); 416 _safe_set_key($mailmap_info, $O2PE, $other, $pemail); 417 } 418 $mailmap_info->{$P2O}{$preferred}{$other}++; 419 if ($pname ne "unknown") { 420 _safe_set_key($mailmap_info, $N2P, $pname, $preferred); 421 } 422 if ($pemail ne "unknown") { 423 _safe_set_key($mailmap_info, $E2P, $pemail, $preferred); 424 } 425 my $line= $preferred; 426 $line .= " $other" if $other; 427 $new_mailmap_hash->{$line}= $line_num; 428 } 429 return ($new_mailmap_hash, $mailmap_info); 430} 431 432sub add_new_mailmap_entries { 433 my ($mailmap_hash, $mailmap_info, $mailmap_file)= @_; 434 435 my $mailmap_add= $mailmap_info->{add} 436 or return 0; 437 438 my $num= 0; 439 for my $new (sort keys %$mailmap_add) { 440 !$mailmap_hash->{$new}++ or next; 441 warn encode_utf8 "Updating '$mailmap_file' with: $new\n"; 442 $num++; 443 } 444 return $num; 445} 446 447sub read_and_update { 448 my ($authors_file, $mailmap_file)= @_; 449 450 # read the authors file and extract the info it contains 451 my ($author_info, $authors_preamble)= read_authors($authors_file); 452 453 # read the mailmap file. 454 my ($orig_mailmap_hash, $mailmap_preamble)= read_mailmap($mailmap_file); 455 456 # check and possibly fix the mailmap data, and build a set of precomputed 457 # datasets to work with it. 458 my ($mailmap_hash, $mailmap_info)= 459 check_fix_mailmap_hash($orig_mailmap_hash, $author_info); 460 461 # update the mailmap based on any check or fixes we just did, 462 # we always write even if we did not do any changes. 463 update_mailmap($mailmap_hash, $mailmap_preamble, $mailmap_file); 464 465 # read the commits names using git log, and compares and checks 466 # them against the data we have in authors. 467 read_commit_log($author_info, $mailmap_info); 468 469 # update the authors file with any changes, we always write, 470 # but we may not change anything 471 update_authors($author_info, $authors_preamble, $authors_file); 472 473 # check if we discovered new email data from the commits that 474 # we need to write back to disk. 475 add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file) 476 and update_mailmap($mailmap_hash, $mailmap_preamble, 477 $mailmap_file, $mailmap_info); 478 479 return undef; 480} 481 482sub main { 483 local $Data::Dumper::Sortkeys= 1; 484 my $authors_file= "AUTHORS"; 485 my $mailmap_file= ".mailmap"; 486 my $show_man= 0; 487 my $show_help= 0; 488 489 ## Parse options and print usage if there is a syntax error, 490 ## or if usage was explicitly requested. 491 GetOptions( 492 'help|?' => \$show_help, 493 'man' => \$show_man, 494 'authors_file|authors-file=s' => \$authors_file, 495 'mailmap_file|mailmap-file=s' => \$mailmap_file, 496 ) or pod2usage(2); 497 pod2usage(1) if $show_help; 498 pod2usage(-verbose => 2) if $show_man; 499 500 read_and_update($authors_file, $mailmap_file); 501 return 0; # 0 for no error - intended for exit(); 502} 503 504exit(main()) unless caller; 505 5061; 507__END__ 508 509=head1 NAME 510 511Porting/updateAUTHORS.pl - Automatically update AUTHORS and .mailmap 512based on commit data. 513 514=head1 SYNOPSIS 515 516Porting/updateAUTHORS.pl 517 518 Options: 519 --help brief help message 520 --man full documentation 521 --authors-file=FILE override default location of AUTHORS 522 --mailmap-file=FILE override default location of .mailmap 523 524=head1 OPTIONS 525 526=over 4 527 528=item --help 529 530Print a brief help message and exits. 531 532=item --man 533 534Prints the manual page and exits. 535 536=item --authors-file=FILE 537 538=item --authors_file=FILE 539 540Override the default location of the authors file, which is "AUTHORS" in 541the current directory. 542 543=item --mailmap-file=FILE 544 545=item --mailmap_file=FILE 546 547Override the default location of the mailmap file, which is ".mailmap" 548in the current directory. 549 550=back 551 552=head1 DESCRIPTION 553 554This program will automatically manage updates to the AUTHORS file and 555.mailmap file based on the data in our commits and the data in the files 556themselves. It uses no other sources of data. Expects to be run from 557the root a git repo of perl. 558 559In simple, execute the script and it will either die with a helpful 560message or it will update the files as necessary, possibly not at all if 561there is no need to do so. Note it will actually rewrite the files at 562least once, but it may not actually make any changes to their content. 563Thus to use the script is currently required that the files are 564modifiable. 565 566Review the changes it makes to make sure they are sane. If they are 567commit. If they are not then update the AUTHORS or .mailmap files as is 568appropriate and run the tool again. Typically you shouldn't need to do 569either unless you are changing the default name or email for a user. For 570instance if a person currently listed in the AUTHORS file whishes to 571change their preferred name or email then change it in the AUTHORS file 572and run the script again. I am not sure when you might need to directly 573modify .mailmap, usually modifying the AUTHORS file should suffice. 574 575=head1 FUNCTIONS 576 577Note that the file can also be used as a package. If you require the 578file then you can access the functions located within the package 579C<Porting::updateAUTHORS>. These are as follows: 580 581=over 4 582 583=item add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file) 584 585If any additions were identified while reading the commits this will 586inject them into the mailmap_hash so they can be written out. Returns a 587count of additions found. 588 589=item check_fix_mailmap_hash($mailmap_hash, $authors_info) 590 591Analyzes the data contained the in the .mailmap file and applies any 592automated fixes which are required and which it can automatically 593perform. Returns a hash of adjusted entries and a hash with additional 594metadata about the mailmap entries. 595 596=item main() 597 598This implements the command line version of this module, handle command 599line options, etc. 600 601=item merge_mailmap_with_AUTHORS_and_checkAUTHORS_data 602 603This is a utility function that combines data from this tool with data 604contained in F<Porting/checkAUTHORS.pl> it is not used directly, but was 605used to cleanup and generate the current version of the .mailmap file. 606 607=item parse_mailmap_hash($mailmap_hash) 608 609Takes a mailmap_hash and parses it and returns it as an array of array 610records with the contents: 611 612 [ $preferred_name, $preferred_email, 613 $other_name, $other_email, 614 $line_num ] 615 616=item read_and_update($authors_file, $mailmap_file) 617 618Wraps the other functions in this library and implements the logic and 619intent of this tool. Takes two arguments, the authors file name, and the 620mailmap file name. Returns nothing but may modify the AUTHORS file 621or the .mailmap file. Requires that both files are editable. 622 623=item read_commit_log($authors_info, $mailmap_info) 624 625Read the commit log and find any new names it contains. 626 627=item read_authors($authors_file) 628 629Read the AUTHORS file and return data about it. 630 631=item read_mailmap($mailmap_file) 632 633Read the .mailmap file and return data about it. 634 635=item update_authors($authors_info, $authors_preamble, $authors_file) 636 637Write out an updated AUTHORS file. This is done atomically 638using a rename, we will not leave a half modified file in 639the repo. 640 641=item update_mailmap($mm_hash, $mm_preamble, $mailmap_file, $mm_info) 642 643Write out an updated .mailmap file. This is done atomically 644using a rename, we will not leave a half modified file in 645the repo. 646 647=back 648 649=head1 TODO 650 651More documentation and testing. 652 653=head1 SEE ALSO 654 655F<Porting/checkAUTHORS.pl> 656 657=head1 AUTHOR 658 659Yves Orton <demerphq@gmail.com> 660 661=cut 662