1#!/bin/sh
2exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
3#!perl -w
4
5
6##############################################################
7###                                                        ###
8### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
9###                                                        ###
10##############################################################
11
12## $Revision: 1.1 $
13## $Date: 2005/10/29 14:58:02 $
14## $Author: suzhe $
15##
16
17use strict;
18
19use File::Basename qw( fileparse );
20use Getopt::Long   qw( GetOptions );
21use Text::Wrap     qw( );
22use Time::Local    qw( timegm );
23use User::pwent    qw( getpwnam );
24
25# The Plan:
26#
27# Read in the logs for multiple files, spit out a nice ChangeLog that
28# mirrors the information entered during `cvs commit'.
29#
30# The problem presents some challenges. In an ideal world, we could
31# detect files with the same author, log message, and checkin time --
32# each <filelist, author, time, logmessage> would be a changelog entry.
33# We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
34# so checkins can span a range of times.  Also, the directory structure
35# could be hierarchical.
36#
37# Another question is whether we really want to have the ChangeLog
38# exactly reflect commits. An author could issue two related commits,
39# with different log entries, reflecting a single logical change to the
40# source. GNU style ChangeLogs group these under a single author/date.
41# We try to do the same.
42#
43# So, we parse the output of `cvs log', storing log messages in a
44# multilevel hash that stores the mapping:
45#   directory => author => time => message => filelist
46# As we go, we notice "nearby" commit times and store them together
47# (i.e., under the same timestamp), so they appear in the same log
48# entry.
49#
50# When we've read all the logs, we twist this mapping into
51# a time => author => message => filelist mapping for each directory.
52#
53# If we're not using the `--distributed' flag, the directory is always
54# considered to be `./', even as descend into subdirectories.
55
56# Call Tree
57
58# name                         number of lines (10.xii.03)
59# parse_options                         192
60# derive_changelog                       13
61# +-maybe_grab_accumulation_date         38
62# +-read_changelog                      277
63#   +-maybe_read_user_map_file           94
64#     +-run_ext                           9
65#   +-read_file_path                     29
66#   +-read_symbolic_name                 43
67#   +-read_revision                      49
68#   +-read_date_author_and_state         25
69#     +-parse_date_author_and_state      20
70#   +-read_branches                      36
71# +-output_changelog                    424
72#   +-pretty_file_list                  290
73#     +-common_path_prefix               35
74#   +-preprocess_msg_text                30
75#     +-min                               1
76#   +-mywrap                             16
77#   +-last_line_len                       5
78#   +-wrap_log_entry                    177
79#
80# Utilities
81#
82# xml_escape                              6
83# slurp_file                             11
84# debug                                   5
85# version                                 2
86# usage                                 142
87
88# -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
89#
90# Note about a bug-slash-opportunity:
91# -----------------------------------
92#
93# There's a bug in Text::Wrap, which affects cvs2cl.  This script
94# reveals it:
95#
96#   #!/usr/bin/perl -w
97#
98#   use Text::Wrap;
99#
100#   my $test_text =
101#   "This script demonstrates a bug in Text::Wrap.  The very long line
102#   following this paragraph will be relocated relative to the surrounding
103#   text:
104#
105#   ====================================================================
106#
107#   See?  When the bug happens, we'll get the line of equal signs below
108#   this paragraph, even though it should be above.";
109#
110#
111#   # Print out the test text with no wrapping:
112#   print "$test_text";
113#   print "\n";
114#   print "\n";
115#
116#   # Now print it out wrapped, and see the bug:
117#   print wrap ("\t", "        ", "$test_text");
118#   print "\n";
119#   print "\n";
120#
121# If the line of equal signs were one shorter, then the bug doesn't
122# happen.  Interesting.
123#
124# Anyway, rather than fix this in Text::Wrap, we might as well write a
125# new wrap() which has the following much-needed features:
126#
127# * initial indentation, like current Text::Wrap()
128# * subsequent line indentation, like current Text::Wrap()
129# * user chooses among: force-break long words, leave them alone, or die()?
130# * preserve existing indentation: chopped chunks from an indented line
131#   are indented by same (like this line, not counting the asterisk!)
132# * optional list of things to preserve on line starts, default ">"
133#
134# Note that the last two are essentially the same concept, so unify in
135# implementation and give a good interface to controlling them.
136#
137# And how about:
138#
139# Optionally, when encounter a line pre-indented by same as previous
140# line, then strip the newline and refill, but indent by the same.
141# Yeah...
142
143# Globals --------------------------------------------------------------------
144
145# In case we have to print it out:
146my $VERSION = '$Revision: 1.1 $';
147$VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
148
149## Vars set by options:
150
151# Print debugging messages?
152my $Debug = 0;
153
154# Just show version and exit?
155my $Print_Version = 0;
156
157# Just print usage message and exit?
158my $Print_Usage = 0;
159
160# What file should we generate (defaults to "ChangeLog")?
161my $Log_File_Name = "ChangeLog";
162
163# Grab most recent entry date from existing ChangeLog file, just add
164# to that ChangeLog.
165my $Cumulative = 0;
166
167# `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
168# as it guarantees at least one entry in the update changelog, which means
169# that there will always be a date to extract for the next update.  The repeat
170# entry can be removed in postprocessing, if necessary.
171
172# MJP 2003-08-02
173# I don't think this actually does anything useful
174my $Update = 0;
175
176# Expand usernames to email addresses based on a map file?
177my $User_Map_File = '';
178my $User_Passwd_File;
179my $Mail_Domain;
180
181# Output log in chronological order? [default is reverse chronological order]
182my $Chronological_Order = 0;
183
184# Grab user details via gecos
185my $Gecos = 0;
186
187# User domain for gecos email addresses
188my $Domain;
189
190# Output to a file or to stdout?
191my $Output_To_Stdout = 0;
192
193# Eliminate empty log messages?
194my $Prune_Empty_Msgs = 0;
195
196# Tags of which not to output
197my %ignore_tags;
198
199# Show only revisions with Tags
200my %show_tags;
201
202# Don't call Text::Wrap on the body of the message
203my $No_Wrap = 0;
204
205# Indentation of log messages
206my $Indent = "\t";
207
208# Don't do any pretty print processing
209my $Summary = 0;
210
211# Separates header from log message.  Code assumes it is either " " or
212# "\n\n", so if there's ever an option to set it to something else,
213# make sure to go through all conditionals that use this var.
214my $After_Header = " ";
215
216# XML Encoding
217my $XML_Encoding = '';
218
219# Format more for programs than for humans.
220my $XML_Output = 0;
221my $No_XML_Namespace = 0;
222my $No_XML_ISO_Date = 0;
223
224# Do some special tweaks for log data that was written in FSF
225# ChangeLog style.
226my $FSF_Style = 0;
227
228# Show times in UTC instead of local time
229my $UTC_Times = 0;
230
231# Show times in output?
232my $Show_Times = 1;
233
234# Show day of week in output?
235my $Show_Day_Of_Week = 0;
236
237# Show revision numbers in output?
238my $Show_Revisions = 0;
239
240# Show dead files in output?
241my $Show_Dead = 0;
242
243# Hide dead trunk files which were created as a result of additions on a
244# branch?
245my $Hide_Branch_Additions = 1;
246
247# Show tags (symbolic names) in output?
248my $Show_Tags = 0;
249
250# Show tags separately in output?
251my $Show_Tag_Dates = 0;
252
253# Show branches by symbolic name in output?
254my $Show_Branches = 0;
255
256# Show only revisions on these branches or their ancestors.
257my @Follow_Branches;
258# Show only revisions on these branches or their ancestors; ignore descendent
259# branches.
260my @Follow_Only;
261
262# Don't bother with files matching this regexp.
263my @Ignore_Files;
264
265# How exactly we match entries.  We definitely want "o",
266# and user might add "i" by using --case-insensitive option.
267my $Case_Insensitive = 0;
268
269# Maybe only show log messages matching a certain regular expression.
270my $Regexp_Gate = '';
271
272# Pass this global option string along to cvs, to the left of `log':
273my $Global_Opts = '';
274
275# Pass this option string along to the cvs log subcommand:
276my $Command_Opts = '';
277
278# Read log output from stdin instead of invoking cvs log?
279my $Input_From_Stdin = 0;
280
281# Don't show filenames in output.
282my $Hide_Filenames = 0;
283
284# Don't shorten directory names from filenames.
285my $Common_Dir = 1;
286
287# Max checkin duration. CVS checkin is not atomic, so we may have checkin
288# times that span a range of time. We assume that checkins will last no
289# longer than $Max_Checkin_Duration seconds, and that similarly, no
290# checkins will happen from the same users with the same message less
291# than $Max_Checkin_Duration seconds apart.
292my $Max_Checkin_Duration = 180;
293
294# What to put at the front of [each] ChangeLog.
295my $ChangeLog_Header = '';
296
297# Whether to enable 'delta' mode, and for what start/end tags.
298my $Delta_Mode = 0;
299my $Delta_From = '';
300my $Delta_To = '';
301
302my $TestCode;
303
304# Whether to parse filenames from the RCS filename, and if so what
305# prefix to strip.
306my $RCS_Root;
307
308# Whether to output information on the # of lines added and removed
309# by each file modification.
310my $Show_Lines_Modified = 0;
311
312## end vars set by options.
313
314# latest observed times for the start/end tags in delta mode
315my $Delta_StartTime = 0;
316my $Delta_EndTime = 0;
317
318my $No_Ancestors = 0;
319
320my $No_Extra_Indent = 0;
321
322my $GroupWithinDate = 0;
323
324# ----------------------------------------------------------------------------
325
326package CVS::Utils::ChangeLog::EntrySet;
327
328sub new {
329  my $class = shift;
330  my %self;
331  bless \%self, $class;
332}
333
334# -------------------------------------
335
336sub output_changelog {
337  my $output_type = $XML_Output ? 'XML' : 'Text';
338  my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
339  my $output = $output_class->new(follow_branches => \@Follow_Branches,
340                                  follow_only     => \@Follow_Only,
341                                  ignore_tags     => \%ignore_tags,
342                                  show_tags       => \%show_tags,
343                                 );
344  $output->output_changelog(@_);
345}
346
347# -------------------------------------
348
349sub add_fileentry {
350  my ($self, $file_full_path, $time, $revision, $state, $lines,
351      $branch_names, $branch_roots, $branch_numbers,
352      $symbolic_names, $author, $msg_txt) = @_;
353
354      my $qunk =
355        CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
356                                              $state, $lines,
357                                              $branch_names, $branch_roots,
358                                              $branch_numbers,
359                                              $symbolic_names);
360
361      # We might be including revision numbers and/or tags and/or
362      # branch names in the output.  Most of the code from here to
363      # loop-end deals with organizing these in qunk.
364
365      unless ( $Hide_Branch_Additions
366               and
367               $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
368        # Add this file to the list
369        # (We use many spoonfuls of autovivication magic. Hashes and arrays
370        # will spring into existence if they aren't there already.)
371
372        &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
373
374        # Store with the files in this commit.  Later we'll loop through
375        # again, making sure that revisions with the same log message
376        # and nearby commit times are grouped together as one commit.
377        $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
378          CVS::Utils::ChangeLog::Message->new($msg_txt)
379              unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
380        $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
381      }
382
383}
384
385# ----------------------------------------------------------------------------
386
387package CVS::Utils::ChangeLog::EntrySet::Output::Text;
388
389use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
390
391use File::Basename qw( fileparse );
392
393sub new {
394  my $class = shift;
395  my $self = $class->SUPER::new(@_);
396}
397
398# -------------------------------------
399
400sub wday {
401  my $self = shift; my $class = ref $self;
402  my ($wday) = @_;
403
404  return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
405}
406
407# -------------------------------------
408
409sub header_line {
410  my $self = shift;
411  my ($time, $author, $lastdate) = @_;
412
413  my $header_line = '';
414
415  my (undef,$min,$hour,$mday,$mon,$year,$wday)
416    = $UTC_Times ? gmtime($time) : localtime($time);
417
418  my $date = $self->fdatetime($time);
419
420  if ($Show_Times) {
421    $header_line =
422      sprintf "%s  %s\n\n", $date, $author;
423  } else {
424    if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
425      if ( $GroupWithinDate ) {
426        $header_line = "$date\n\n";
427      } else {
428        $header_line = "$date  $author\n\n";
429      }
430    } else {
431      $header_line = '';
432    }
433  }
434}
435
436# -------------------------------------
437
438sub preprocess_msg_text {
439  my $self = shift;
440  my ($text) = @_;
441
442  $text = $self->SUPER::preprocess_msg_text($text);
443
444  unless ( $No_Wrap ) {
445    # Strip off lone newlines, but only for lines that don't begin with
446    # whitespace or a mail-quoting character, since we want to preserve
447    # that kind of formatting.  Also don't strip newlines that follow a
448    # period; we handle those specially next.  And don't strip
449    # newlines that precede an open paren.
450    1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
451
452    # If a newline follows a period, make sure that when we bring up the
453    # bottom sentence, it begins with two spaces.
454    1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
455  }
456
457  return $text;
458}
459
460# -------------------------------------
461
462# Here we take a bunch of qunks and convert them into printed
463# summary that will include all the information the user asked for.
464sub pretty_file_list {
465  my $self = shift;
466
467  return ''
468    if $Hide_Filenames;
469
470  my $qunksref = shift;
471
472  my @filenames;
473  my $beauty = '';          # The accumulating header string for this entry.
474  my %non_unanimous_tags;   # Tags found in a proper subset of qunks
475  my %unanimous_tags;       # Tags found in all qunks
476  my %all_branches;         # Branches found in any qunk
477  my $fbegun = 0;           # Did we begin printing filenames yet?
478
479  my ($common_dir, $qunkrefs) =
480    $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
481
482  my @qunkrefs = @$qunkrefs;
483
484  # Not XML output, so complexly compactify for chordate consumption.  At this
485  # point we have enough global information about all the qunks to organize
486  # them non-redundantly for output.
487
488  if ($common_dir) {
489    # Note that $common_dir still has its trailing slash
490    $beauty .= "$common_dir: ";
491  }
492
493  if ($Show_Branches)
494  {
495    # For trailing revision numbers.
496    my @brevisions;
497
498    foreach my $branch (keys (%all_branches))
499    {
500      foreach my $qunkref (@qunkrefs)
501      {
502        if ((defined ($qunkref->branch))
503            and ($qunkref->branch eq $branch))
504        {
505          if ($fbegun) {
506            # kff todo: comma-delimited in XML too?  Sure.
507            $beauty .= ", ";
508          }
509          else {
510            $fbegun = 1;
511          }
512          my $fname = substr ($qunkref->filename, length ($common_dir));
513          $beauty .= $fname;
514          $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
515
516          if ( $Show_Tags and defined $qunkref->tags ) {
517            my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
518
519            if (@tags) {
520              $beauty .= " (tags: ";
521              $beauty .= join (', ', @tags);
522              $beauty .= ")";
523            }
524          }
525
526          if ($Show_Revisions) {
527            # Collect the revision numbers' last components, but don't
528            # print them -- they'll get printed with the branch name
529            # later.
530            $qunkref->revision =~ /.+\.([\d]+)$/;
531            push (@brevisions, $1);
532
533            # todo: we're still collecting branch roots, but we're not
534            # showing them anywhere.  If we do show them, it would be
535            # nifty to just call them revision "0" on a the branch.
536            # Yeah, that's the ticket.
537          }
538        }
539      }
540      $beauty .= " ($branch";
541      if (@brevisions) {
542        if ((scalar (@brevisions)) > 1) {
543          $beauty .= ".[";
544          $beauty .= (join (',', @brevisions));
545          $beauty .= "]";
546        }
547        else {
548          # Square brackets are spurious here, since there's no range to
549          # encapsulate
550          $beauty .= ".$brevisions[0]";
551        }
552      }
553      $beauty .= ")";
554    }
555  }
556
557  # Okay; any qunks that were done according to branch are taken care
558  # of, and marked as printed.  Now print everyone else.
559
560  my %fileinfo_printed;
561  foreach my $qunkref (@qunkrefs)
562  {
563    next if (defined ($qunkref->{'printed'}));   # skip if already printed
564
565    my $b = substr ($qunkref->filename, length ($common_dir));
566    # todo: Shlomo's change was this:
567    # $beauty .= substr ($qunkref->filename,
568    #              (($common_dir eq "./") ? '' : length ($common_dir)));
569    $qunkref->{'printed'} = 1;  # Set a mark bit.
570
571    if ($Show_Revisions || $Show_Tags || $Show_Dead)
572    {
573      my $started_addendum = 0;
574
575      if ($Show_Revisions) {
576        $started_addendum = 1;
577        $b .= " (";
578        $b .= $qunkref->revision;
579      }
580      if ($Show_Dead && $qunkref->state =~ /dead/)
581      {
582        # Deliberately not using $started_addendum. Keeping it simple.
583        $b .= "[DEAD]";
584      }
585      if ($Show_Tags && (defined $qunkref->tags)) {
586        my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
587        if ((scalar (@tags)) > 0) {
588          if ($started_addendum) {
589            $b .= ", ";
590          }
591          else {
592            $b .= " (tags: ";
593          }
594          $b .= join (', ', @tags);
595          $started_addendum = 1;
596        }
597      }
598      if ($started_addendum) {
599        $b .= ")";
600      }
601    }
602
603    unless ( exists $fileinfo_printed{$b} ) {
604      if ($fbegun) {
605        $beauty .= ", ";
606      } else {
607        $fbegun = 1;
608      }
609      $beauty .= $b, $fileinfo_printed{$b} = 1;
610    }
611  }
612
613  # Unanimous tags always come last.
614  if ($Show_Tags && %unanimous_tags)
615  {
616    $beauty .= " (utags: ";
617    $beauty .= join (', ', sort keys (%unanimous_tags));
618    $beauty .= ")";
619  }
620
621  # todo: still have to take care of branch_roots?
622
623  $beauty = "$beauty:";
624
625  return $beauty;
626}
627
628# -------------------------------------
629
630sub output_tagdate {
631  my $self = shift;
632  my ($fh, $time, $tag) = @_;
633
634  my $fdatetime = $self->fdatetime($time);
635  print $fh "$fdatetime  tag $tag\n\n";
636  return;
637}
638
639# -------------------------------------
640
641sub format_body {
642  my $self = shift;
643  my ($msg, $files, $qunklist) = @_;
644
645  my $body;
646
647  if ( $No_Wrap and ! $Summary ) {
648    $msg = $self->preprocess_msg_text($msg);
649    $files = $self->mywrap("\t", "\t  ", "* $files");
650    $msg =~ s/\n(.+)/\n$Indent$1/g;
651    unless ($After_Header eq " ") {
652      $msg =~ s/^(.+)/$Indent$1/g;
653    }
654    if ( $Hide_Filenames ) {
655      $body = $After_Header . $msg;
656    } else {
657      $body = $files . $After_Header . $msg;
658    }
659  } elsif ( $Summary ) {
660    my ($filelist, $qunk);
661    my (@DeletedQunks, @AddedQunks, @ChangedQunks);
662
663    $msg = $self->preprocess_msg_text($msg);
664    #
665    #     Sort the files (qunks) according to the operation that was
666    # performed.  Files which were added have no line change
667    # indicator, whereas deleted files have state dead.
668    #
669    foreach $qunk ( @$qunklist ) {
670      if ( "dead" eq $qunk->state) {
671        push @DeletedQunks, $qunk;
672      } elsif ( ! defined $qunk->lines ) {
673        push @AddedQunks, $qunk;
674      } else {
675        push @ChangedQunks, $qunk;
676      }
677    }
678    #
679    #     The qunks list was  originally in tree search order.  Let's
680    # get that back.  The lists, if they exist, will be reversed upon
681    # processing.
682    #
683
684    #
685    #     Now write the three sections onto $filelist
686    #
687    if ( @DeletedQunks ) {
688      $filelist .= "\tDeleted:\n";
689      foreach $qunk ( @DeletedQunks ) {
690        $filelist .= "\t\t" . $qunk->filename;
691        $filelist .= " (" . $qunk->revision . ")";
692        $filelist .= "\n";
693      }
694      undef @DeletedQunks;
695    }
696
697    if ( @AddedQunks ) {
698      $filelist .= "\tAdded:\n";
699      foreach $qunk (@AddedQunks) {
700        $filelist .= "\t\t" . $qunk->filename;
701        $filelist .= " (" . $qunk->revision . ")";
702        $filelist .= "\n";
703      }
704      undef @AddedQunks ;
705    }
706
707    if ( @ChangedQunks ) {
708      $filelist .= "\tChanged:\n";
709      foreach $qunk (@ChangedQunks) {
710        $filelist .= "\t\t" . $qunk->filename;
711        $filelist .= " (" . $qunk->revision . ")";
712        $filelist .= ", \"" . $qunk->state . "\"";
713        $filelist .= ", lines: " . $qunk->lines;
714        $filelist .= "\n";
715      }
716      undef @ChangedQunks;
717    }
718
719    chomp $filelist;
720
721    if ( $Hide_Filenames ) {
722      $filelist = '';
723    }
724
725    $msg =~ s/\n(.*)/\n$Indent$1/g;
726    unless ( $After_Header eq " " or $FSF_Style ) {
727      $msg =~ s/^(.*)/$Indent$1/g;
728    }
729
730    unless ( $No_Wrap ) {
731      if ( $FSF_Style ) {
732        $msg = $self->wrap_log_entry($msg, '', 69, 69);
733        chomp($msg);
734        chomp($msg);
735      } else {
736        $msg = $self->mywrap('', $Indent, "$msg");
737        $msg =~ s/[ \t]+\n/\n/g;
738      }
739    }
740
741    $body = $filelist . $After_Header . $msg;
742  } else {  # do wrapping, either FSF-style or regular
743    my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
744
745    if ( $FSF_Style ) {
746      $files = $self->mywrap($Indent, $latter_wrap, "* $files");
747
748      my $files_last_line_len = 0;
749      if ( $After_Header eq " " ) {
750        $files_last_line_len = $self->last_line_len($files);
751        $files_last_line_len += 1;  # for $After_Header
752      }
753
754      $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
755      $body = $files . $After_Header . $msg;
756    } else {  # not FSF-style
757      $msg = $self->preprocess_msg_text($msg);
758      $body = $files . $After_Header . $msg;
759      $body = $self->mywrap($Indent, $latter_wrap, "* $body");
760      $body =~ s/[ \t]+\n/\n/g;
761    }
762  }
763
764  return $body;
765}
766
767# ----------------------------------------------------------------------------
768
769package CVS::Utils::ChangeLog::EntrySet::Output::XML;
770
771use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
772
773use File::Basename qw( fileparse );
774
775sub new {
776  my $class = shift;
777  my $self = $class->SUPER::new(@_);
778}
779
780# -------------------------------------
781
782sub header_line {
783  my $self = shift;
784  my ($time, $author, $lastdate) = @_;
785
786  my $header_line = '';
787
788  my $isoDate;
789
790  my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
791
792  # Ideally, this would honor $UTC_Times and use +HH:MM syntax
793  $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
794                     $y + 1900, $m + 1, $d, $H, $M, $S);
795
796  my (undef,$min,$hour,$mday,$mon,$year,$wday)
797    = $UTC_Times ? gmtime($time) : localtime($time);
798
799  my $date = $self->fdatetime($time);
800  $wday = $self->wday($wday);
801
802  $header_line =
803    sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
804             $year+1900, $mon+1, $mday, $hour, $min);
805  $header_line .= "<isoDate>$isoDate</isoDate>\n"
806    unless $No_XML_ISO_Date;
807  $header_line .= sprintf("<author>%s</author>\n" , $author);
808}
809
810# -------------------------------------
811
812sub wday {
813  my $self = shift; my $class = ref $self;
814  my ($wday) = @_;
815
816  return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
817}
818
819# -------------------------------------
820
821sub escape {
822  my $self = shift;
823
824  my $txt = shift;
825  $txt =~ s/&/&amp;/g;
826  $txt =~ s/</&lt;/g;
827  $txt =~ s/>/&gt;/g;
828  return $txt;
829}
830
831# -------------------------------------
832
833sub output_header {
834  my $self = shift;
835  my ($fh) = @_;
836
837  my $encoding    =
838    length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
839  my $version     = 'version="1.0"';
840  my $declaration =
841    sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
842  my $root        =
843    $No_XML_Namespace ?
844      '<changelog>'     :
845        '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
846  print $fh "$declaration\n\n$root\n\n";
847}
848
849# -------------------------------------
850
851sub output_footer {
852  my $self = shift;
853  my ($fh) = @_;
854
855  print $fh "</changelog>\n";
856}
857
858# -------------------------------------
859
860sub preprocess_msg_text {
861  my $self = shift;
862  my ($text) = @_;
863
864  $text = $self->SUPER::preprocess_msg_text($text);
865
866  $text = $self->escape($text);
867  chomp $text;
868  $text = "<msg>${text}</msg>\n";
869
870  return $text;
871}
872
873# -------------------------------------
874
875# Here we take a bunch of qunks and convert them into a printed
876# summary that will include all the information the user asked for.
877sub pretty_file_list {
878  my $self = shift;
879  my ($qunksref) = @_;
880
881  my $beauty = '';          # The accumulating header string for this entry.
882  my %non_unanimous_tags;   # Tags found in a proper subset of qunks
883  my %unanimous_tags;       # Tags found in all qunks
884  my %all_branches;         # Branches found in any qunk
885  my $fbegun = 0;           # Did we begin printing filenames yet?
886
887  my ($common_dir, $qunkrefs) =
888    $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
889      $qunksref);
890
891  my @qunkrefs = @$qunkrefs;
892
893  # If outputting XML, then our task is pretty simple, because we
894  # don't have to detect common dir, common tags, branch prefixing,
895  # etc.  We just output exactly what we have, and don't worry about
896  # redundancy or readability.
897
898  foreach my $qunkref (@qunkrefs)
899  {
900    my $filename    = $qunkref->filename;
901    my $state       = $qunkref->state;
902    my $revision    = $qunkref->revision;
903    my $tags        = $qunkref->tags;
904    my $branch      = $qunkref->branch;
905    my $branchroots = $qunkref->roots;
906    my $lines       = $qunkref->lines;
907
908    $filename = $self->escape($filename);   # probably paranoia
909    $revision = $self->escape($revision);   # definitely paranoia
910
911    $beauty .= "<file>\n";
912    $beauty .= "<name>${filename}</name>\n";
913    $beauty .= "<cvsstate>${state}</cvsstate>\n";
914    $beauty .= "<revision>${revision}</revision>\n";
915
916    if ($Show_Lines_Modified
917        && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
918        $beauty .= "<linesadded>$1</linesadded>\n";
919        $beauty .= "<linesremoved>$2</linesremoved>\n";
920    }
921
922    if ($branch) {
923      $branch   = $self->escape($branch);     # more paranoia
924      $beauty .= "<branch>${branch}</branch>\n";
925    }
926    foreach my $tag (@$tags) {
927      $tag = $self->escape($tag);  # by now you're used to the paranoia
928      $beauty .= "<tag>${tag}</tag>\n";
929    }
930    foreach my $root (@$branchroots) {
931      $root = $self->escape($root);  # which is good, because it will continue
932      $beauty .= "<branchroot>${root}</branchroot>\n";
933    }
934    $beauty .= "</file>\n";
935  }
936
937  # Theoretically, we could go home now.  But as long as we're here,
938  # let's print out the common_dir and utags, as a convenience to
939  # the receiver (after all, earlier code calculated that stuff
940  # anyway, so we might as well take advantage of it).
941
942  if ((scalar (keys (%unanimous_tags))) > 1) {
943    foreach my $utag ((keys (%unanimous_tags))) {
944      $utag = $self->escape($utag);   # the usual paranoia
945      $beauty .= "<utag>${utag}</utag>\n";
946    }
947  }
948  if ($common_dir) {
949    $common_dir = $self->escape($common_dir);
950    $beauty .= "<commondir>${common_dir}</commondir>\n";
951  }
952
953  # That's enough for XML, time to go home:
954  return $beauty;
955}
956
957# -------------------------------------
958
959sub output_tagdate {
960  my $self = shift;
961  my ($fh, $time, $tag) = @_;
962
963  my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
964
965  # Ideally, this would honor $UTC_Times and use +HH:MM syntax
966  my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
967                       $y + 1900, $m + 1, $d, $H, $M, $S);
968
969  print $fh "<tagdate>\n";
970  print $fh "<tagisodate>$isoDate</tagisodate>\n";
971  print $fh "<tagdatetag>$tag</tagdatetag>\n";
972  print $fh "</tagdate>\n\n";
973  return;
974}
975
976# -------------------------------------
977
978sub output_entry {
979  my $self = shift;
980  my ($fh, $entry) = @_;
981  print $fh "<entry>\n$entry</entry>\n\n";
982}
983
984# -------------------------------------
985
986sub format_body {
987  my $self = shift;
988  my ($msg, $files, $qunklist) = @_;
989
990  $msg = $self->preprocess_msg_text($msg);
991  return $files . $msg;
992}
993
994# ----------------------------------------------------------------------------
995
996package CVS::Utils::ChangeLog::EntrySet::Output;
997
998use Carp           qw( croak );
999use File::Basename qw( fileparse );
1000
1001# Class Utility Functions -------------
1002
1003{ # form closure
1004
1005my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
1006sub weekday_en {
1007  my $class = shift;
1008  return $weekdays[$_[0]];
1009}
1010
1011}
1012
1013# -------------------------------------
1014
1015sub new {
1016  my ($proto, %args) = @_;
1017  my $class = ref $proto || $proto;
1018
1019  my $follow_branches = delete $args{follow_branches};
1020  my $follow_only     = delete $args{follow_only};
1021  my $ignore_tags     = delete $args{ignore_tags};
1022  my $show_tags       = delete $args{show_tags};
1023  die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1024    for keys %args;
1025
1026  bless +{follow_branches => $follow_branches,
1027          follow_only     => $follow_only,
1028          show_tags       => $show_tags,
1029          ignore_tags     => $ignore_tags,
1030         }, $class;
1031}
1032
1033# Abstract Subrs ----------------------
1034
1035sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
1036sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
1037sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
1038sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
1039
1040# Instance Subrs ----------------------
1041
1042sub output_header { }
1043
1044# -------------------------------------
1045
1046sub output_entry {
1047  my $self = shift;
1048  my ($fh, $entry) = @_;
1049  print $fh "$entry\n";
1050}
1051
1052# -------------------------------------
1053
1054sub output_footer { }
1055
1056# -------------------------------------
1057
1058sub escape { return $_[1] }
1059
1060# -------------------------------------
1061
1062sub _revision_is_wanted {
1063  my ($self, $qunk) = @_;
1064
1065  my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1066  my $follow_branches = $self->{follow_branches};
1067  my $follow_only     = $self->{follow_only};
1068
1069  for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1070    return
1071      if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1072  }
1073
1074  if ( keys %{$self->{show_tags}} ) {
1075    for my $show_tag (keys %{$self->{show_tags}}) {
1076      return
1077        if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1078    }
1079  }
1080
1081  return 1
1082    unless @$follow_branches + @$follow_only; # no follow is follow all
1083
1084  for my $x (map([$_, 1], @$follow_branches),
1085             map([$_, 0], @$follow_only    )) {
1086    my ($branch, $followsub) = @$x;
1087
1088    # Special case for following trunk revisions
1089    return 1
1090      if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1091
1092    if ( my $branch_number = $branch_numbers->{$branch} ) {
1093      # Are we on one of the follow branches or an ancestor of same?
1094
1095      # If this revision is a prefix of the branch number, or possibly is less
1096      # in the minormost number, OR if this branch number is a prefix of the
1097      # revision, then yes.  Otherwise, no.
1098
1099      # So below, we determine if any of those conditions are met.
1100
1101      # Trivial case: is this revision on the branch?  (Compare this way to
1102      # avoid regexps that screw up Emacs indentation, argh.)
1103      if ( substr($revision, 0, (length($branch_number) + 1))
1104           eq
1105           ($branch_number . ".") ) {
1106        if ( $followsub ) {
1107          return 1;
1108#        } elsif ( length($revision) == length($branch_number)+2 ) {
1109        } elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
1110          return 1;
1111        }
1112      } elsif ( length($branch_number) > length($revision)
1113                and
1114                ! $No_Ancestors ) {
1115        # Non-trivial case: check if rev is ancestral to branch
1116
1117        # r_left still has the trailing "."
1118        my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1119
1120        # b_left still has trailing "."
1121        # b_mid has no trailing "."
1122        my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1123        return 1
1124          if $r_left eq $b_left and $r_end <= $b_mid;
1125      }
1126    }
1127  }
1128
1129  return;
1130}
1131
1132# -------------------------------------
1133
1134sub output_changelog {
1135my $self = shift; my $class = ref $self;
1136  my ($grand_poobah) = @_;
1137  ### Process each ChangeLog
1138
1139  while (my ($dir,$authorhash) = each %$grand_poobah)
1140  {
1141    &main::debug ("DOING DIR: $dir\n");
1142
1143    # Here we twist our hash around, from being
1144    #   author => time => message => filelist
1145    # in %$authorhash to
1146    #   time => author => message => filelist
1147    # in %changelog.
1148    #
1149    # This is also where we merge entries.  The algorithm proceeds
1150    # through the timeline of the changelog with a sliding window of
1151    # $Max_Checkin_Duration seconds; within that window, entries that
1152    # have the same log message are merged.
1153    #
1154    # (To save space, we zap %$authorhash after we've copied
1155    # everything out of it.)
1156
1157    my %changelog;
1158    while (my ($author,$timehash) = each %$authorhash)
1159    {
1160      my %stamptime;
1161      foreach my $time (sort {$a <=> $b} (keys %$timehash))
1162      {
1163        my $msghash = $timehash->{$time};
1164        while (my ($msg,$qunklist) = each %$msghash)
1165        {
1166          my $stamptime = $stamptime{$msg};
1167          if ((defined $stamptime)
1168              and (($time - $stamptime) < $Max_Checkin_Duration)
1169              and (defined $changelog{$stamptime}{$author}{$msg}))
1170          {
1171            push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1172          }
1173          else {
1174            $changelog{$time}{$author}{$msg} = $qunklist->files;
1175            $stamptime{$msg} = $time;
1176          }
1177        }
1178      }
1179    }
1180    undef (%$authorhash);
1181
1182    ### Now we can write out the ChangeLog!
1183
1184    my ($logfile_here, $logfile_bak, $tmpfile);
1185    my $lastdate;
1186
1187    if (! $Output_To_Stdout) {
1188      $logfile_here =  $dir . $Log_File_Name;
1189      $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1190      $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1191      $logfile_bak  = "${logfile_here}.bak";
1192
1193      open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1194    }
1195    else {
1196      open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1197    }
1198
1199    print LOG_OUT $ChangeLog_Header;
1200
1201    my %tag_date_printed;
1202
1203    $self->output_header(\*LOG_OUT);
1204
1205    my @key_list = ();
1206    if($Chronological_Order) {
1207        @key_list = sort {$a <=> $b} (keys %changelog);
1208    } else {
1209        @key_list = sort {$b <=> $a} (keys %changelog);
1210    }
1211    foreach my $time (@key_list)
1212    {
1213      next if ($Delta_Mode &&
1214               (($time <= $Delta_StartTime) ||
1215                ($time > $Delta_EndTime && $Delta_EndTime)));
1216
1217      # Set up the date/author line.
1218      # kff todo: do some more XML munging here, on the header
1219      # part of the entry:
1220      my (undef,$min,$hour,$mday,$mon,$year,$wday)
1221          = $UTC_Times ? gmtime($time) : localtime($time);
1222
1223      $wday = $self->wday($wday);
1224      # XML output includes everything else, we might as well make
1225      # it always include Day Of Week too, for consistency.
1226      my $authorhash = $changelog{$time};
1227      if ( $Show_Tag_Dates || $XML_Output ) {
1228        my %tags;
1229        while (my ($author,$mesghash) = each %$authorhash) {
1230          while (my ($msg,$qunk) = each %$mesghash) {
1231            for my $qunkref2 (@$qunk) {
1232              if (defined ($qunkref2->tags)) {
1233                for my $tag (@{$qunkref2->tags}) {
1234                  $tags{$tag} = 1;
1235                }
1236              }
1237            }
1238          }
1239        }
1240        # Sort here for determinism to ease testing
1241        foreach my $tag (sort keys %tags) {
1242          if ( ! defined $tag_date_printed{$tag} ) {
1243            $tag_date_printed{$tag} = $time;
1244            $self->output_tagdate(\*LOG_OUT, $time, $tag);
1245          }
1246        }
1247      }
1248      while (my ($author,$mesghash) = each %$authorhash)
1249      {
1250        # If XML, escape in outer loop to avoid compound quoting:
1251        $author = $self->escape($author);
1252
1253      FOOBIE:
1254        # We sort here to enable predictable ordering for the testing porpoises
1255        for my $msg (sort keys %$mesghash)
1256        {
1257          my $qunklist = $mesghash->{$msg};
1258
1259          my @qunklist =
1260            grep $self->_revision_is_wanted($_), @$qunklist;
1261
1262          next FOOBIE unless @qunklist;
1263
1264          my $files               = $self->pretty_file_list(\@qunklist);
1265          my $header_line;          # date and author
1266          my $wholething;           # $header_line + $body
1267
1268          my $date = $self->fdatetime($time);
1269          $header_line = $self->header_line($time, $author, $lastdate);
1270          $lastdate = $date;
1271
1272          $Text::Wrap::huge = 'overflow'
1273            if $Text::Wrap::VERSION >= 2001.0130;
1274          # Reshape the body according to user preferences.
1275          my $body = $self->format_body($msg, $files, \@qunklist);
1276
1277          $body =~ s/[ \t]+\n/\n/g;
1278          $wholething = $header_line . $body;
1279
1280          # One last check: make sure it passes the regexp test, if the
1281          # user asked for that.  We have to do it here, so that the
1282          # test can match against information in the header as well
1283          # as in the text of the log message.
1284
1285          # How annoying to duplicate so much code just because I
1286          # can't figure out a way to evaluate scalars on the trailing
1287          # operator portion of a regular expression.  Grrr.
1288          if ($Case_Insensitive) {
1289            unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1290              $self->output_entry(\*LOG_OUT, $wholething);
1291            }
1292          }
1293          else {
1294            unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1295              $self->output_entry(\*LOG_OUT, $wholething);
1296            }
1297          }
1298        }
1299      }
1300    }
1301
1302    $self->output_footer(\*LOG_OUT);
1303
1304    close (LOG_OUT);
1305
1306    if ( ! $Output_To_Stdout ) {
1307      # If accumulating, append old data to new before renaming.  But
1308      # don't append the most recent entry, since it's already in the
1309      # new log due to CVS's idiosyncratic interpretation of "log -d".
1310      if ($Cumulative && -f $logfile_here) {
1311        open NEW_LOG, ">>$tmpfile"
1312          or die "trouble appending to $tmpfile ($!)";
1313
1314        open OLD_LOG, "<$logfile_here"
1315          or die "trouble reading from $logfile_here ($!)";
1316
1317        my $started_first_entry = 0;
1318        my $passed_first_entry = 0;
1319        while (<OLD_LOG>) {
1320          if ( ! $passed_first_entry ) {
1321            if ( ( ! $started_first_entry )
1322                and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1323              $started_first_entry = 1;
1324            } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1325              $passed_first_entry = 1;
1326              print NEW_LOG $_;
1327            }
1328          } else {
1329            print NEW_LOG $_;
1330          }
1331        }
1332
1333        close NEW_LOG;
1334        close OLD_LOG;
1335      }
1336
1337      if ( -f $logfile_here ) {
1338        rename $logfile_here, $logfile_bak;
1339      }
1340      rename $tmpfile, $logfile_here;
1341    }
1342  }
1343}
1344
1345# -------------------------------------
1346
1347# Don't call this wrap, because with 5.5.3, that clashes with the
1348# (unconditional :-( ) export of wrap() from Text::Wrap
1349sub mywrap {
1350  my $self = shift;
1351  my ($indent1, $indent2, @text) = @_;
1352  # If incoming text looks preformatted, don't get clever
1353  my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1354  if ( grep /^\s+/m, @text ) {
1355    return $text;
1356  }
1357  my @lines = split /\n/, $text;
1358  $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1359  $lines[0] =~ s/^$indent1\s+/$indent1/;
1360  s/^$indent2\s+/$indent2/
1361    for @lines[1..$#lines];
1362  my $newtext = join "\n", @lines;
1363  $newtext .= "\n"
1364    if substr($text, -1) eq "\n";
1365  return $newtext;
1366}
1367
1368# -------------------------------------
1369
1370sub preprocess_msg_text {
1371  my $self = shift;
1372  my ($text) = @_;
1373
1374  # Strip out carriage returns (as they probably result from DOSsy editors).
1375  $text =~ s/\r\n/\n/g;
1376  # If it *looks* like two newlines, make it *be* two newlines:
1377  $text =~ s/\n\s*\n/\n\n/g;
1378
1379  return $text;
1380}
1381
1382# -------------------------------------
1383
1384sub last_line_len {
1385  my $self = shift;
1386
1387  my $files_list = shift;
1388  my @lines = split (/\n/, $files_list);
1389  my $last_line = pop (@lines);
1390  return length ($last_line);
1391}
1392
1393# -------------------------------------
1394
1395# A custom wrap function, sensitive to some common constructs used in
1396# log entries.
1397sub wrap_log_entry {
1398  my $self = shift;
1399
1400  my $text = shift;                  # The text to wrap.
1401  my $left_pad_str = shift;          # String to pad with on the left.
1402
1403  # These do NOT take left_pad_str into account:
1404  my $length_remaining = shift;      # Amount left on current line.
1405  my $max_line_length  = shift;      # Amount left for a blank line.
1406
1407  my $wrapped_text = '';             # The accumulating wrapped entry.
1408  my $user_indent = '';              # Inherited user_indent from prev line.
1409
1410  my $first_time = 1;                # First iteration of the loop?
1411  my $suppress_line_start_match = 0; # Set to disable line start checks.
1412
1413  my @lines = split (/\n/, $text);
1414  while (@lines)   # Don't use `foreach' here, it won't work.
1415  {
1416    my $this_line = shift (@lines);
1417    chomp $this_line;
1418
1419    if ($this_line =~ /^(\s+)/) {
1420      $user_indent = $1;
1421    }
1422    else {
1423      $user_indent = '';
1424    }
1425
1426    # If it matches any of the line-start regexps, print a newline now...
1427    if ($suppress_line_start_match)
1428    {
1429      $suppress_line_start_match = 0;
1430    }
1431    elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1432           || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1433           || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1434           || ($this_line =~ /^(\s+)(\S+)/)
1435           || ($this_line =~ /^(\s*)- +/)
1436           || ($this_line =~ /^()\s*$/)
1437           || ($this_line =~ /^(\s*)\*\) +/)
1438           || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1439    {
1440      # Make a line break immediately, unless header separator is set
1441      # and this line is the first line in the entry, in which case
1442      # we're getting the blank line for free already and shouldn't
1443      # add an extra one.
1444      unless (($After_Header ne " ") and ($first_time))
1445      {
1446        if ($this_line =~ /^()\s*$/) {
1447          $suppress_line_start_match = 1;
1448          $wrapped_text .= "\n${left_pad_str}";
1449        }
1450
1451        $wrapped_text .= "\n${left_pad_str}";
1452      }
1453
1454      $length_remaining = $max_line_length - (length ($user_indent));
1455    }
1456
1457    # Now that any user_indent has been preserved, strip off leading
1458    # whitespace, so up-folding has no ugly side-effects.
1459    $this_line =~ s/^\s*//;
1460
1461    # Accumulate the line, and adjust parameters for next line.
1462    my $this_len = length ($this_line);
1463    if ($this_len == 0)
1464    {
1465      # Blank lines should cancel any user_indent level.
1466      $user_indent = '';
1467      $length_remaining = $max_line_length;
1468    }
1469    elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1470    {
1471      # Walk backwards from the end.  At first acceptable spot, break
1472      # a new line.
1473      my $idx = $length_remaining - 1;
1474      if ($idx < 0) { $idx = 0 };
1475      while ($idx > 0)
1476      {
1477        if (substr ($this_line, $idx, 1) =~ /\s/)
1478        {
1479          my $line_now = substr ($this_line, 0, $idx);
1480          my $next_line = substr ($this_line, $idx);
1481          $this_line = $line_now;
1482
1483          # Clean whitespace off the end.
1484          chomp $this_line;
1485
1486          # The current line is ready to be printed.
1487          $this_line .= "\n${left_pad_str}";
1488
1489          # Make sure the next line is allowed full room.
1490          $length_remaining = $max_line_length - (length ($user_indent));
1491
1492          # Strip next_line, but then preserve any user_indent.
1493          $next_line =~ s/^\s*//;
1494
1495          # Sneak a peek at the user_indent of the upcoming line, so
1496          # $next_line (which will now precede it) can inherit that
1497          # indent level.  Otherwise, use whatever user_indent level
1498          # we currently have, which might be none.
1499          my $next_next_line = shift (@lines);
1500          if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1501            $next_line = $1 . $next_line if (defined ($1));
1502            # $length_remaining = $max_line_length - (length ($1));
1503            $next_next_line =~ s/^\s*//;
1504          }
1505          else {
1506            $next_line = $user_indent . $next_line;
1507          }
1508          if (defined ($next_next_line)) {
1509            unshift (@lines, $next_next_line);
1510          }
1511          unshift (@lines, $next_line);
1512
1513          # Our new next line might, coincidentally, begin with one of
1514          # the line-start regexps, so we temporarily turn off
1515          # sensitivity to that until we're past the line.
1516          $suppress_line_start_match = 1;
1517
1518          last;
1519        }
1520        else
1521        {
1522          $idx--;
1523        }
1524      }
1525
1526      if ($idx == 0)
1527      {
1528        # We bottomed out because the line is longer than the
1529        # available space.  But that could be because the space is
1530        # small, or because the line is longer than even the maximum
1531        # possible space.  Handle both cases below.
1532
1533        if ($length_remaining == ($max_line_length - (length ($user_indent))))
1534        {
1535          # The line is simply too long -- there is no hope of ever
1536          # breaking it nicely, so just insert it verbatim, with
1537          # appropriate padding.
1538          $this_line = "\n${left_pad_str}${this_line}";
1539        }
1540        else
1541        {
1542          # Can't break it here, but may be able to on the next round...
1543          unshift (@lines, $this_line);
1544          $length_remaining = $max_line_length - (length ($user_indent));
1545          $this_line = "\n${left_pad_str}";
1546        }
1547      }
1548    }
1549    else  # $this_len < $length_remaining, so tack on what we can.
1550    {
1551      # Leave a note for the next iteration.
1552      $length_remaining = $length_remaining - $this_len;
1553
1554      if ($this_line =~ /\.$/)
1555      {
1556        $this_line .= "  ";
1557        $length_remaining -= 2;
1558      }
1559      else  # not a sentence end
1560      {
1561        $this_line .= " ";
1562        $length_remaining -= 1;
1563      }
1564    }
1565
1566    # Unconditionally indicate that loop has run at least once.
1567    $first_time = 0;
1568
1569    $wrapped_text .= "${user_indent}${this_line}";
1570  }
1571
1572  # One last bit of padding.
1573  $wrapped_text .= "\n";
1574
1575  return $wrapped_text;
1576}
1577
1578# -------------------------------------
1579
1580sub _pretty_file_list {
1581  my $self = shift;
1582
1583  my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1584
1585  my @qunkrefs =
1586    grep +( ( ! $_->tags_exists
1587              or
1588              ! grep exists $ignore_tags{$_}, @{$_->tags})
1589            and
1590            ( ! keys %show_tags
1591              or
1592              ( $_->tags_exists
1593                and
1594                grep exists $show_tags{$_}, @{$_->tags} )
1595            )
1596          ),
1597    @$qunksref;
1598
1599  my $common_dir;           # Dir prefix common to all files ('' if none)
1600
1601  # First, loop over the qunks gathering all the tag/branch names.
1602  # We'll put them all in non_unanimous_tags, and take out the
1603  # unanimous ones later.
1604 QUNKREF:
1605  foreach my $qunkref (@qunkrefs)
1606  {
1607    # Keep track of whether all the files in this commit were in the
1608    # same directory, and memorize it if so.  We can make the output a
1609    # little more compact by mentioning the directory only once.
1610    if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1611    {
1612      if (! (defined ($common_dir)))
1613      {
1614        my ($base, $dir);
1615        ($base, $dir, undef) = fileparse ($qunkref->filename);
1616
1617        if ((! (defined ($dir)))  # this first case is sheer paranoia
1618            or ($dir eq '')
1619            or ($dir eq "./")
1620            or ($dir eq ".\\"))
1621        {
1622          $common_dir = '';
1623        }
1624        else
1625        {
1626          $common_dir = $dir;
1627        }
1628      }
1629      elsif ($common_dir ne '')
1630      {
1631        # Already have a common dir prefix, so how much of it can we preserve?
1632        $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1633      }
1634    }
1635    else  # only one file in this entry anyway, so common dir not an issue
1636    {
1637      $common_dir = '';
1638    }
1639
1640    if (defined ($qunkref->branch)) {
1641      $all_branches->{$qunkref->branch} = 1;
1642    }
1643    if (defined ($qunkref->tags)) {
1644      foreach my $tag (@{$qunkref->tags}) {
1645        $non_unanimous_tags->{$tag} = 1;
1646      }
1647    }
1648  }
1649
1650  # Any tag held by all qunks will be printed specially... but only if
1651  # there are multiple qunks in the first place!
1652  if ((scalar (@qunkrefs)) > 1) {
1653    foreach my $tag (keys (%$non_unanimous_tags)) {
1654      my $everyone_has_this_tag = 1;
1655      foreach my $qunkref (@qunkrefs) {
1656        if ((! (defined ($qunkref->tags)))
1657            or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1658          $everyone_has_this_tag = 0;
1659        }
1660      }
1661      if ($everyone_has_this_tag) {
1662        $unanimous_tags->{$tag} = 1;
1663        delete $non_unanimous_tags->{$tag};
1664      }
1665    }
1666  }
1667
1668  return $common_dir, \@qunkrefs;
1669}
1670
1671# -------------------------------------
1672
1673sub fdatetime {
1674  my $self = shift;
1675
1676  my ($year, $mday, $mon, $wday, $hour, $min);
1677
1678  if ( @_ > 1 ) {
1679    ($year, $mday, $mon, $wday, $hour, $min) = @_;
1680  } else {
1681    my ($time) = @_;
1682    (undef, $min, $hour, $mday, $mon, $year, $wday) =
1683      $UTC_Times ? gmtime($time) : localtime($time);
1684
1685    $year += 1900;
1686    $mon  += 1;
1687    $wday  = $self->wday($wday);
1688  }
1689
1690  my $fdate = $self->fdate($year, $mon, $mday, $wday);
1691
1692  if ($Show_Times) {
1693    my $ftime = $self->ftime($hour, $min);
1694    return "$fdate $ftime";
1695  } else {
1696    return $fdate;
1697  }
1698}
1699
1700# -------------------------------------
1701
1702sub fdate {
1703  my $self = shift;
1704
1705  my ($year, $mday, $mon, $wday);
1706
1707  if ( @_ > 1 ) {
1708    ($year, $mon, $mday, $wday) = @_;
1709  } else {
1710    my ($time) = @_;
1711    (undef, undef, undef, $mday, $mon, $year, $wday) =
1712      $UTC_Times ? gmtime($time) : localtime($time);
1713
1714    $year += 1900;
1715    $mon  += 1;
1716    $wday  = $self->wday($wday);
1717  }
1718
1719  return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1720}
1721
1722# -------------------------------------
1723
1724sub ftime {
1725  my $self = shift;
1726
1727  my ($hour, $min);
1728
1729  if ( @_ > 1 ) {
1730    ($hour, $min) = @_;
1731  } else {
1732    my ($time) = @_;
1733    (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1734  }
1735
1736  return sprintf '%02u:%02u', $hour, $min;
1737}
1738
1739# ----------------------------------------------------------------------------
1740
1741package CVS::Utils::ChangeLog::Message;
1742
1743sub new {
1744  my $class = shift;
1745  my ($msg) = @_;
1746
1747  my %self = (msg => $msg, files => []);
1748
1749  bless \%self, $class;
1750}
1751
1752sub add_fileentry {
1753  my $self = shift;
1754  my ($fileentry) = @_;
1755
1756  die "Not a fileentry: $fileentry"
1757    unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1758
1759  push @{$self->{files}}, $fileentry;
1760}
1761
1762sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1763
1764# ----------------------------------------------------------------------------
1765
1766package CVS::Utils::ChangeLog::FileEntry;
1767
1768use File::Basename qw( fileparse );
1769
1770# Each revision of a file has a little data structure (a `qunk')
1771# associated with it.  That data structure holds not only the
1772# file's name, but any additional information about the file
1773# that might be needed in the output, such as the revision
1774# number, tags, branches, etc.  The reason to have these things
1775# arranged in a data structure, instead of just appending them
1776# textually to the file's name, is that we may want to do a
1777# little rearranging later as we write the output.  For example,
1778# all the files on a given tag/branch will go together, followed
1779# by the tag in parentheses (so trunk or otherwise non-tagged
1780# files would go at the end of the file list for a given log
1781# message).  This rearrangement is a lot easier to do if we
1782# don't have to reparse the text.
1783#
1784# A qunk looks like this:
1785#
1786#   {
1787#     filename    =>    "hello.c",
1788#     revision    =>    "1.4.3.2",
1789#     time        =>    a timegm() return value (moment of commit)
1790#     tags        =>    [ "tag1", "tag2", ... ],
1791#     branch      =>    "branchname" # There should be only one, right?
1792#     roots       =>    [ "branchtag1", "branchtag2", ... ]
1793#     lines       =>    "+x -y" # or undefined; x and y are integers
1794#   }
1795
1796# Single top-level ChangeLog, or one per subdirectory?
1797my $distributed;
1798sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1799
1800sub new {
1801  my $class = shift;
1802  my ($path, $time, $revision, $state, $lines,
1803      $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1804
1805  my %self = (time     => $time,
1806              revision => $revision,
1807              state    => $state,
1808              lines    => $lines,
1809              branch_numbers => $branch_numbers,
1810             );
1811
1812  if ( $distributed ) {
1813    @self{qw(filename dir_key)} = fileparse($path);
1814  } else {
1815    @self{qw(filename dir_key)} = ($path, './');
1816  }
1817
1818  { # Scope for $branch_prefix
1819    (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1820    $branch_prefix =~ s/\.$//;
1821    if ( $branch_names->{$branch_prefix} ) {
1822      my $branch_name = $branch_names->{$branch_prefix};
1823      $self{branch}   = $branch_name;
1824      $self{branches} = [$branch_name];
1825    }
1826    while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1827      push @{$self{branches}}, $branch_names->{$branch_prefix}
1828        if exists $branch_names->{$branch_prefix};
1829    }
1830  }
1831
1832  # If there's anything in the @branch_roots array, then this
1833  # revision is the root of at least one branch.  We'll display
1834  # them as branch names instead of revision numbers, the
1835  # substitution for which is done directly in the array:
1836  $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1837    if @$branch_roots;
1838
1839  if ( exists $symbolic_names->{$revision} ) {
1840    $self{tags} = delete $symbolic_names->{$revision};
1841    &main::delta_check($time, $self{tags});
1842  }
1843
1844  bless \%self, $class;
1845}
1846
1847sub filename       { $_[0]->{filename}       }
1848sub dir_key        { $_[0]->{dir_key}        }
1849sub revision       { $_[0]->{revision}       }
1850sub branch         { $_[0]->{branch}         }
1851sub state          { $_[0]->{state}          }
1852sub lines          { $_[0]->{lines}          }
1853sub roots          { $_[0]->{roots}          }
1854sub branch_numbers { $_[0]->{branch_numbers} }
1855
1856sub tags        { $_[0]->{tags}     }
1857sub tags_exists {
1858  exists $_[0]->{tags};
1859}
1860
1861# This may someday be used in a more sophisticated calculation of what other
1862# files are involved in this commit.  For now, we don't use it much except for
1863# delta mode, because the common-commit-detection algorithm is hypothesized to
1864# be "good enough" as it stands.
1865sub time     { $_[0]->{time}     }
1866
1867# ----------------------------------------------------------------------------
1868
1869package CVS::Utils::ChangeLog::EntrySetBuilder;
1870
1871use File::Basename qw( fileparse );
1872use Time::Local    qw( timegm );
1873
1874use constant MAILNAME => "/etc/mailname";
1875
1876# In 'cvs log' output, one long unbroken line of equal signs separates files:
1877use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1878# In 'cvs log' output, a shorter line of dashes separates log messages within
1879# a file:
1880use constant REV_SEPARATOR  => '-' x 28;# . "\n";
1881
1882use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1883
1884# -------------------------------------
1885
1886sub new {
1887  my ($proto) = @_;
1888  my $class = ref $proto || $proto;
1889
1890  my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
1891  my $self = bless +{ grand_poobah => $poobah }, $class;
1892
1893  $self->clear_file;
1894  $self->maybe_read_user_map_file;
1895  return $self;
1896}
1897
1898# -------------------------------------
1899
1900sub clear_msg {
1901  my ($self) = @_;
1902
1903  # Make way for the next message
1904  undef $self->{rev_msg};
1905  undef $self->{rev_time};
1906  undef $self->{rev_revision};
1907  undef $self->{rev_author};
1908  undef $self->{rev_state};
1909  undef $self->{lines};
1910  $self->{rev_branch_roots} = [];       # For showing which files are branch
1911                                        # ancestors.
1912  $self->{collecting_symbolic_names} = 0;
1913}
1914
1915# -------------------------------------
1916
1917sub clear_file {
1918  my ($self) = @_;
1919  $self->clear_msg;
1920
1921  undef $self->{filename};
1922  $self->{branch_names}   = +{};        # We'll grab branch names while we're
1923                                        # at it.
1924  $self->{branch_numbers} = +{};        # Save some revisions for
1925                                        # @Follow_Branches
1926  $self->{symbolic_names} = +{};        # Where tag names get stored.
1927}
1928
1929# -------------------------------------
1930
1931sub grand_poobah { $_[0]->{grand_poobah} }
1932
1933# -------------------------------------
1934
1935sub read_changelog {
1936  my ($self, $command) = @_;
1937
1938  local (*READER, *WRITER);
1939  my $pid;
1940  if (! $Input_From_Stdin) {
1941    pipe(READER, WRITER)
1942      or die "Couldn't form pipe: $!\n";
1943    $pid = fork;
1944    die "Couldn't fork: $!\n"
1945      if ! defined $pid;
1946    if ( ! $pid ) { # child
1947      open STDOUT, '>&=' . fileno WRITER
1948        or die "Couldn't dup stderr to ", fileno WRITER, "\n";
1949      # strangely, some perls give spurious warnings about STDIN being opened
1950      # for output only these close calls precede the STDOUT reopen above.
1951      # I think they must be reusing fd 1.
1952      close READER;
1953      close STDIN;
1954
1955      exec @$command;
1956    }
1957
1958    close WRITER;
1959
1960    &main::debug ("(run \"@$command\")\n");
1961  }
1962  else {
1963    open READER, '-' or die "unable to open stdin for reading";
1964  }
1965
1966  binmode READER;
1967
1968 XX_Log_Source:
1969  while (<READER>) {
1970    chomp;
1971    s!\r$!!;
1972
1973    # If on a new file and don't see filename, skip until we find it, and
1974    # when we find it, grab it.
1975    if ( ! defined $self->{filename} ) {
1976      $self->read_file_path($_);
1977    } elsif ( /^symbolic names:$/ ) {
1978      $self->{collecting_symbolic_names} = 1;
1979    } elsif ( $self->{collecting_symbolic_names} ) {
1980      $self->read_symbolic_name($_);
1981    } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
1982      $self->clear_file;
1983    } elsif ( ! defined $self->{rev_revision} ) {
1984        # If have file name, but not revision, and see revision, then grab
1985        # it.  (We collect unconditionally, even though we may or may not
1986        # ever use it.)
1987      $self->read_revision($_);
1988    } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
1989      $self->read_date_author_and_state($_);
1990    } elsif ( /^branches:\s+(.*);$/ ) {
1991      $self->read_branches($1);
1992    } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
1993      # If have file name, time, and author, then we're just grabbing
1994      # log message texts:
1995      $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
1996    } else {
1997      my $noadd = 0;
1998      if ( ! $self->{rev_msg}
1999           or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
2000           or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
2001        # ... until a msg separator is encountered:
2002        # Ensure the message contains something:
2003        $self->clear_msg, $noadd = 1
2004          if $Prune_Empty_Msgs;
2005        $self->{rev_msg} = "[no log message]\n";
2006      }
2007
2008      $self->add_file_entry
2009        unless $noadd;
2010
2011      if ( $_ eq FILE_SEPARATOR ) {
2012        $self->clear_file;
2013      } else {
2014        $self->clear_msg;
2015      }
2016    }
2017  }
2018
2019  close READER
2020    or die "Couldn't close pipe reader: $!\n";
2021  if ( defined $pid ) {
2022    my $rv;
2023    waitpid $pid, 0;
2024    0 == $?
2025      or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
2026                           $pid, $? >> 8, $? & 127, $? & 128);
2027  }
2028  return;
2029}
2030
2031# -------------------------------------
2032
2033sub add_file_entry {
2034  $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
2035                                                 rev_state lines branch_names
2036                                                 rev_branch_roots
2037                                                 branch_numbers
2038                                                 symbolic_names
2039                                                 rev_author rev_msg)});
2040}
2041
2042# -------------------------------------
2043
2044sub maybe_read_user_map_file {
2045  my ($self) = @_;
2046
2047  my %expansions;
2048  my $User_Map_Input;
2049
2050  if ($User_Map_File)
2051  {
2052    if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2053         !-f $User_Map_File )
2054    {
2055      my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2056      $User_Map_Input = "$rsh $1 'cat $2' |";
2057      &main::debug ("(run \"${User_Map_Input}\")\n");
2058    }
2059    else
2060    {
2061      $User_Map_Input = "<$User_Map_File";
2062    }
2063
2064    open (MAPFILE, $User_Map_Input)
2065        or die ("Unable to open $User_Map_File ($!)");
2066
2067    while (<MAPFILE>)
2068    {
2069      next if /^\s*#/;  # Skip comment lines.
2070      next if not /:/;  # Skip lines without colons.
2071
2072      # It is now safe to split on ':'.
2073      my ($username, $expansion) = split ':';
2074      chomp $expansion;
2075      $expansion =~ s/^'(.*)'$/$1/;
2076      $expansion =~ s/^"(.*)"$/$1/;
2077
2078      # If it looks like the expansion has a real name already, then
2079      # we toss the username we got from CVS log.  Otherwise, keep
2080      # it to use in combination with the email address.
2081
2082      if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2083        # Also, add angle brackets if none present
2084        if (! ($expansion =~ /<\S+@\S+>/)) {
2085          $expansions{$username} = "$username <$expansion>";
2086        }
2087        else {
2088          $expansions{$username} = "$username $expansion";
2089        }
2090      }
2091      else {
2092        $expansions{$username} = $expansion;
2093      }
2094    } # fi ($User_Map_File)
2095
2096    close (MAPFILE);
2097  }
2098
2099  if (defined $User_Passwd_File)
2100  {
2101    if ( ! defined $Domain ) {
2102      if ( -e MAILNAME ) {
2103        chomp($Domain = slurp_file(MAILNAME));
2104      } else {
2105      MAILDOMAIN_CMD:
2106        for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2107          my ($text, $exit, $sig, $core) = run_ext($_);
2108          if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2109            chomp $text;
2110            if ( length $text ) {
2111              $Domain = $text;
2112              last MAILDOMAIN_CMD;
2113            }
2114          }
2115        }
2116      }
2117    }
2118
2119    die "No mail domain found\n"
2120      unless defined $Domain;
2121
2122    open (MAPFILE, "<$User_Passwd_File")
2123        or die ("Unable to open $User_Passwd_File ($!)");
2124    while (<MAPFILE>)
2125    {
2126      # all lines are valid
2127      my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2128      my $expansion = '';
2129      ($expansion) = split (',', $gecos)
2130        if defined $gecos && length $gecos;
2131
2132      my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2133      $expansions{$username} = "$expansion <$mailname>";
2134    }
2135    close (MAPFILE);
2136  }
2137
2138 $self->{usermap} = \%expansions;
2139}
2140
2141# -------------------------------------
2142
2143sub read_file_path {
2144  my ($self, $line) = @_;
2145
2146  my $path;
2147
2148  if ( $line =~ /^Working file: (.*)/ ) {
2149    $path = $1;
2150  } elsif ( defined $RCS_Root
2151            and
2152            $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2153    $path = $1;
2154    $path =~ s!Attic/!!;
2155  } else {
2156    return;
2157  }
2158
2159  if ( @Ignore_Files ) {
2160    my $base;
2161    ($base, undef, undef) = fileparse($path);
2162
2163    my $xpath = $Case_Insensitive ? lc($path) : $path;
2164    return
2165      if grep $path =~ /$_/, @Ignore_Files;
2166  }
2167
2168  $self->{filename} = $path;
2169  return;
2170}
2171
2172# -------------------------------------
2173
2174sub read_symbolic_name {
2175  my ($self, $line) = @_;
2176
2177  # All tag names are listed with whitespace in front in cvs log
2178  # output; so if see non-whitespace, then we're done collecting.
2179  if ( /^\S/ ) {
2180    $self->{collecting_symbolic_names} = 0;
2181    return;
2182  } else {
2183    # we're looking at a tag name, so parse & store it
2184
2185    # According to the Cederqvist manual, in node "Tags", tag names must start
2186    # with an uppercase or lowercase letter and can contain uppercase and
2187    # lowercase letters, digits, `-', and `_'.  However, it's not our place to
2188    # enforce that, so we'll allow anything CVS hands us to be a tag:
2189    my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2190
2191    # A branch number either has an odd number of digit sections
2192    # (and hence an even number of dots), or has ".0." as the
2193    # second-to-last digit section.  Test for these conditions.
2194    my $real_branch_rev = '';
2195    if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
2196         and
2197         $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
2198      $real_branch_rev = $tag_rev;
2199    } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
2200      $real_branch_rev = $1 . $3;
2201    }
2202
2203    # If we got a branch, record its number.
2204    if ( $real_branch_rev ) {
2205      $self->{branch_names}->{$real_branch_rev} = $tag_name;
2206      $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2207    } else {
2208      # Else it's just a regular (non-branch) tag.
2209      push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2210    }
2211  }
2212
2213  $self->{collecting_symbolic_names} = 1;
2214  return;
2215}
2216
2217# -------------------------------------
2218
2219sub read_revision {
2220  my ($self, $line) = @_;
2221
2222  my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2223
2224  return
2225    unless $revision;
2226
2227  $self->{rev_revision} = $revision;
2228  return;
2229}
2230
2231# -------------------------------------
2232
2233{ # Closure over %gecos_warned
2234my %gecos_warned;
2235sub read_date_author_and_state {
2236  my ($self, $line) = @_;
2237
2238  my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2239
2240  if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2241    $author = $self->{usermap}->{$author};
2242  } elsif ( defined $Domain or $Gecos == 1 ) {
2243    my $email = $author;
2244    $email = $author."@".$Domain
2245      if defined $Domain && $Domain ne '';
2246
2247    my $pw = getpwnam($author);
2248    my ($fullname, $office, $workphone, $homephone, $gcos);
2249    if ( defined $pw ) {
2250      $gcos = (getpwnam($author))[6];
2251      ($fullname, $office, $workphone, $homephone) =
2252        split /\s*,\s*/, $gcos;
2253    } else {
2254      warn "Couldn't find gecos info for author '$author'\n"
2255        unless $gecos_warned{$author}++;
2256      $fullname = '';
2257    }
2258    for (grep defined, $fullname, $office, $workphone, $homephone) {
2259      s/&/ucfirst(lc($pw->name))/ge;
2260    }
2261    $author = $fullname . "  <" . $email . ">"
2262      if $fullname ne '';
2263  }
2264
2265  $self->{rev_state}  = $state;
2266  $self->{rev_time}   = $time;
2267  $self->{rev_author} = $author;
2268  return;
2269}
2270}
2271
2272# -------------------------------------
2273
2274sub read_branches {
2275  # A "branches: ..." line here indicates that one or more branches
2276  # are rooted at this revision.  If we're showing branches, then we
2277  # want to show that fact as well, so we collect all the branches
2278  # that this is the latest ancestor of and store them in
2279  # $self->[rev_branch_roots}.  Just for reference, the format of the
2280  # line we're seeing at this point is:
2281  #
2282  #    branches:  1.5.2;  1.5.4;  ...;
2283  #
2284  # Okay, here goes:
2285  my ($self, $line) = @_;
2286
2287  # Ugh.  This really bothers me.  Suppose we see a log entry
2288  # like this:
2289  #
2290  #    ----------------------------
2291  #    revision 1.1
2292  #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2293  #    branches:  1.1.2;
2294  #    Intended first line of log message begins here.
2295  #    ----------------------------
2296  #
2297  # The question is, how we can tell the difference between that
2298  # log message and a *two*-line log message whose first line is
2299  #
2300  #    "branches:  1.1.2;"
2301  #
2302  # See the problem?  The output of "cvs log" is inherently
2303  # ambiguous.
2304  #
2305  # For now, we punt: we liberally assume that people don't
2306  # write log messages like that, and just toss a "branches:"
2307  # line if we see it but are not showing branches.  I hope no
2308  # one ever loses real log data because of this.
2309  if ( $Show_Branches ) {
2310    $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2311    $self->{rev_branch_roots} = [split /;\s+/, $line]
2312      if length $line;
2313  }
2314}
2315
2316# -------------------------------------
2317
2318sub parse_date_author_and_state {
2319  my ($self, $line) = @_;
2320  # Parses the date/time and author out of a line like:
2321  #
2322  # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2323  #
2324  # or, in CVS 1.12.9:
2325  #
2326  # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
2327
2328  my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
2329    $line =~
2330      m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
2331        author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2332    or  die "Couldn't parse date ``$line''";
2333  die "Bad date or Y2K issues"
2334    unless $year > 1969 and $year < 2258;
2335  # Kinda arbitrary, but useful as a sanity check
2336  my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2337  if ( defined $utcOffset ) {
2338    my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
2339    my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
2340    $time += $offset;
2341  }
2342  if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2343    $self->{lines} = $1;
2344  }
2345
2346  return $time, $author, $state;
2347}
2348
2349# Subrs ----------------------------------------------------------------------
2350
2351package main;
2352
2353sub delta_check {
2354  my ($time, $tags) = @_;
2355
2356  # If we're in 'delta' mode, update the latest observed times for the
2357  # beginning and ending tags, and when we get around to printing output, we
2358  # will simply restrict ourselves to that timeframe...
2359  return
2360    unless $Delta_Mode;
2361
2362  $Delta_StartTime = $time
2363    if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
2364
2365  $Delta_EndTime = $time
2366    if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
2367}
2368
2369sub run_ext {
2370  my ($cmd) = @_;
2371  $cmd = [$cmd]
2372    unless ref $cmd;
2373  local $" = ' ';
2374  my $out = qx"@$cmd 2>&1";
2375  my $rv  = $?;
2376  my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2377  return $out, $exit, $sig, $core;
2378}
2379
2380# -------------------------------------
2381
2382# If accumulating, grab the boundary date from pre-existing ChangeLog.
2383sub maybe_grab_accumulation_date {
2384  if (! $Cumulative || $Update) {
2385    return '';
2386  }
2387
2388  # else
2389
2390  open (LOG, "$Log_File_Name")
2391      or die ("trouble opening $Log_File_Name for reading ($!)");
2392
2393  my $boundary_date;
2394  while (<LOG>)
2395  {
2396    if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
2397    {
2398      $boundary_date = "$1";
2399      last;
2400    }
2401  }
2402
2403  close (LOG);
2404
2405  # convert time from utc to local timezone if the ChangeLog has
2406  # dates/times in utc
2407  if ($UTC_Times && $boundary_date)
2408  {
2409    # convert the utc time to a time value
2410    my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2411      m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2412    my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2413    # print the timevalue in the local timezone
2414    my ($ignore,$wday);
2415    ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2416    $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2417                            $year+1900,$mon+1,$mday,$hour,$min);
2418  }
2419
2420  return $boundary_date;
2421}
2422
2423# -------------------------------------
2424
2425# Fills up a ChangeLog structure in the current directory.
2426sub derive_changelog {
2427  my ($command) = @_;
2428
2429  # See "The Plan" above for a full explanation.
2430
2431  # Might be adding to an existing ChangeLog
2432  my $accumulation_date = maybe_grab_accumulation_date;
2433  if ($accumulation_date) {
2434    # Insert -d immediately after 'cvs log'
2435    my $Log_Date_Command = "-d>${accumulation_date}";
2436
2437    my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2438    splice @$command, $log_index+1, 0, $Log_Date_Command;
2439    &debug ("(adding log msg starting from $accumulation_date)\n");
2440  }
2441
2442#  output_changelog(read_changelog($command));
2443  my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2444  $builder->read_changelog($command);
2445  $builder->grand_poobah->output_changelog;
2446}
2447
2448# -------------------------------------
2449
2450sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2451
2452# -------------------------------------
2453
2454sub common_path_prefix {
2455  my ($path1, $path2) = @_;
2456
2457  # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2458  # terms, and mould windoze filenames to match.  Is this really appropriate?
2459  # If a file is checked in under UN*X, and cvs log run on windoze, which way
2460  # do the path separators slope?  Can we use fileparse as per the local
2461  # conventions?  If so, we should probably have a user option to specify an
2462  # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2463  # the nasty \-/ transmogrification below.
2464
2465  my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2466
2467  # Transmogrify Windows filenames to look like Unix.
2468  # (It is far more likely that someone is running cvs2cl.pl under
2469  # Windows than that they would genuinely have backslashes in their
2470  # filenames.)
2471  tr!\\!/!
2472    for $dir1, $dir2;
2473
2474  my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2475
2476  my @path1 = grep length($_), split qr!/!, $dir1;
2477  my @path2 = grep length($_), split qr!/!, $dir2;
2478
2479  my @common_path;
2480  for (0..min($#path1,$#path2)) {
2481    if ( $path1[$_] eq $path2[$_]) {
2482      push @common_path, $path1[$_];
2483    } else {
2484      last;
2485    }
2486  }
2487
2488  return join '', map "$_/", @common_path;
2489}
2490
2491# -------------------------------------
2492
2493sub parse_options {
2494  # Check this internally before setting the global variable.
2495  my $output_file;
2496
2497  # If this gets set, we encountered unknown options and will exit at
2498  # the end of this subroutine.
2499  my $exit_with_admonishment = 0;
2500
2501  # command to generate the log
2502  my @log_source_command = qw( cvs log );
2503
2504  my (@Global_Opts, @Local_Opts);
2505
2506  Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2507                              pass_through no_ignore_case ));
2508  GetOptions('help|usage|h'   => \$Print_Usage,
2509             'debug'          => \$Debug,        # unadvertised option, heh
2510             'version'        => \$Print_Version,
2511
2512             'file|f=s'       => \$output_file,
2513             'accum'          => \$Cumulative,
2514             'update'         => \$Update,
2515             'fsf'            => \$FSF_Style,
2516             'rcs=s'          => \$RCS_Root,
2517             'usermap|U=s'    => \$User_Map_File,
2518             'gecos'          => \$Gecos,
2519             'domain=s'       => \$Domain,
2520             'passwd=s'       => \$User_Passwd_File,
2521             'window|W=i'     => \$Max_Checkin_Duration,
2522             'chrono'         => \$Chronological_Order,
2523             'ignore|I=s'     => \@Ignore_Files,
2524             'case-insensitive|C' => \$Case_Insensitive,
2525             'regexp|R=s'     => \$Regexp_Gate,
2526             'stdin'          => \$Input_From_Stdin,
2527             'stdout'         => \$Output_To_Stdout,
2528             'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2529             'prune|P'        => \$Prune_Empty_Msgs,
2530             'no-wrap'        => \$No_Wrap,
2531             'gmt|utc'        => \$UTC_Times,
2532             'day-of-week|w'  => \$Show_Day_Of_Week,
2533             'revisions|r'    => \$Show_Revisions,
2534             'show-dead'      => \$Show_Dead,
2535             'tags|t'         => \$Show_Tags,
2536             'tagdates|T'     => \$Show_Tag_Dates,
2537             'branches|b'     => \$Show_Branches,
2538             'follow|F=s'     => \@Follow_Branches,
2539             'follow-only=s'  => \@Follow_Only,
2540             'xml-encoding=s' => \$XML_Encoding,
2541             'xml'            => \$XML_Output,
2542             'noxmlns'        => \$No_XML_Namespace,
2543             'no-xml-iso-date' => \$No_XML_ISO_Date,
2544             'no-ancestors'   => \$No_Ancestors,
2545             'lines-modified' => \$Show_Lines_Modified,
2546
2547             'no-indent'    => sub {
2548               $Indent = '';
2549             },
2550
2551             'summary'      => sub {
2552               $Summary = 1;
2553               $After_Header = "\n\n"; # Summary implies --separate-header
2554             },
2555
2556             'no-times'     => sub {
2557               $Show_Times = 0;
2558             },
2559
2560             'no-hide-branch-additions' => sub {
2561               $Hide_Branch_Additions = 0;
2562             },
2563
2564             'no-common-dir'  => sub {
2565               $Common_Dir = 0;
2566             },
2567
2568             'ignore-tag=s'   => sub {
2569               $ignore_tags{$_[1]} = 1;
2570             },
2571
2572             'show-tag=s'     => sub {
2573               $show_tags{$_[1]} = 1;
2574             },
2575
2576             # Deliberately undocumented.  This is not a public interface, and
2577             # may change/disappear at any time.
2578             'test-code=s'    => \$TestCode,
2579
2580             'delta=s'        => sub {
2581               my $arg = $_[1];
2582               if ( $arg =~
2583                    /^([A-Za-z][A-Za-z0-9_\-\]\[]*):([A-Za-z][A-Za-z0-9_\-\]\[]*)$/ ) {
2584                 $Delta_From = $1;
2585                 $Delta_To = $2;
2586                 $Delta_Mode = 1;
2587               } else {
2588                 die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2589               }
2590             },
2591
2592             'FSF'             => sub {
2593               $Show_Times = 0;
2594               $Common_Dir = 0;
2595               $No_Extra_Indent = 1;
2596               $Indent = "\t";
2597             },
2598
2599             'header=s'        => sub {
2600               my $narg = $_[1];
2601               $ChangeLog_Header = &slurp_file ($narg);
2602               if (! defined ($ChangeLog_Header)) {
2603                 $ChangeLog_Header = '';
2604               }
2605             },
2606
2607             'global-opts|g=s' => sub {
2608               my $narg = $_[1];
2609               push @Global_Opts, $narg;
2610               splice @log_source_command, 1, 0, $narg;
2611             },
2612
2613             'log-opts|l=s' => sub {
2614               my $narg = $_[1];
2615               push @Local_Opts, $narg;
2616               push @log_source_command, $narg;
2617             },
2618
2619             'mailname=s'   => sub {
2620               my $narg = $_[1];
2621               warn "--mailname is deprecated; please use --domain instead\n";
2622               $Domain = $narg;
2623             },
2624
2625             'separate-header|S' => sub {
2626               $After_Header = "\n\n";
2627               $No_Extra_Indent = 1;
2628             },
2629
2630             'group-within-date' => sub {
2631               $GroupWithinDate = 1;
2632               $Show_Times = 0;
2633             },
2634
2635             'hide-filenames' => sub {
2636               $Hide_Filenames = 1;
2637               $After_Header = '';
2638             },
2639            )
2640    or die "options parsing failed\n";
2641
2642  push @log_source_command, map "$_", @ARGV;
2643
2644  ## Check for contradictions...
2645
2646  if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2647    print STDERR "cannot pass both --stdout and --distributed\n";
2648    $exit_with_admonishment = 1;
2649  }
2650
2651  if ($Output_To_Stdout && $output_file) {
2652    print STDERR "cannot pass both --stdout and --file\n";
2653    $exit_with_admonishment = 1;
2654  }
2655
2656  if ($Input_From_Stdin && @Global_Opts) {
2657    print STDERR "cannot pass both --stdin and -g\n";
2658    $exit_with_admonishment = 1;
2659  }
2660
2661  if ($Input_From_Stdin && @Local_Opts) {
2662    print STDERR "cannot pass both --stdin and -l\n";
2663    $exit_with_admonishment = 1;
2664  }
2665
2666  if ($XML_Output && $Cumulative) {
2667    print STDERR "cannot pass both --xml and --accum\n";
2668    $exit_with_admonishment = 1;
2669  }
2670
2671  # Other consistency checks and option-driven logic
2672
2673  # Bleargh.  Compensate for a deficiency of custom wrapping.
2674  if ( ($After_Header ne " ") and $FSF_Style ) {
2675    $After_Header .= "\t";
2676  }
2677
2678  @Ignore_Files = map lc, @Ignore_Files
2679    if $Case_Insensitive;
2680
2681  # Or if any other error message has already been printed out, we
2682  # just leave now:
2683  if ($exit_with_admonishment) {
2684    &usage ();
2685    exit (1);
2686  }
2687  elsif ($Print_Usage) {
2688    &usage ();
2689    exit (0);
2690  }
2691  elsif ($Print_Version) {
2692    &version ();
2693    exit (0);
2694  }
2695
2696  ## Else no problems, so proceed.
2697
2698  if ($output_file) {
2699    $Log_File_Name = $output_file;
2700  }
2701
2702  return \@log_source_command;
2703}
2704
2705# -------------------------------------
2706
2707sub slurp_file {
2708  my $filename = shift || die ("no filename passed to slurp_file()");
2709  my $retstr;
2710
2711  open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2712  local $/ = undef;
2713  $retstr = <SLURPEE>;
2714  close (SLURPEE);
2715  return $retstr;
2716}
2717
2718# -------------------------------------
2719
2720sub debug {
2721  if ($Debug) {
2722    my $msg = shift;
2723    print STDERR $msg;
2724  }
2725}
2726
2727# -------------------------------------
2728
2729sub version {
2730  print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2731}
2732
2733# -------------------------------------
2734
2735sub usage {
2736  &version ();
2737
2738  eval "use Pod::Usage qw( pod2usage )";
2739
2740   if ( $@ ) {
2741    print <<'END';
2742
2743* Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2744  upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2745  versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2746  install Pod::Usage direct from CPAN.
2747END
2748
2749    local $/ = undef;
2750    my $message = <DATA>;
2751    $message =~ s/^=(head1|item) //gm;
2752    $message =~ s/^=(over|back).*\n//gm;
2753    $message =~ s/\n{3,}/\n\n/g;
2754    print $message;
2755  } else {
2756    print "\n";
2757    pod2usage( -exitval => 'NOEXIT',
2758               -verbose => 1,
2759               -output  => \*STDOUT,
2760             );
2761  }
2762
2763  return;
2764}
2765
2766# Main -----------------------------------------------------------------------
2767
2768my $log_source_command = parse_options;
2769if ( defined $TestCode ) {
2770  eval $TestCode;
2771  die "Eval failed: '$@'\n"
2772    if $@;
2773} else {
2774  derive_changelog($log_source_command);
2775}
2776
2777__DATA__
2778
2779=head1 NAME
2780
2781cvs2cl.pl - convert cvs log messages to changelogs
2782
2783=head1 SYNOPSIS
2784
2785B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2786
2787=head1 DESCRIPTION
2788
2789cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2790running "cvs log" and parsing the output. Duplicate log messages get
2791unified in the Right Way.
2792
2793The default output of cvs2cl is designed to be compact, formally unambiguous,
2794but still easy for humans to read.  It should be largely self-explanatory; the
2795one abbreviation that might not be obvious is "utags".  That stands for
2796"universal tags" -- a universal tag is one held by all the files in a given
2797change entry.
2798
2799If you need output that's easy for a program to parse, use the B<--xml> option.
2800Note that with XML output, just about all available information is included
2801with each change entry, whether you asked for it or not, on the theory that
2802your parser can ignore anything it's not looking for.
2803
2804If filenames are given as arguments cvs2cl only shows log information for the
2805named files.
2806
2807=head1 OPTIONS
2808
2809=over 4
2810
2811=item B<-h>, B<-help>, B<--help>, B<-?>
2812
2813Show a short help and exit.
2814
2815=item B<--version>
2816
2817Show version and exit.
2818
2819=item B<-r>, B<--revisions>
2820
2821Show revision numbers in output.
2822
2823=item B<-b>, B<--branches>
2824
2825Show branch names in revisions when possible.
2826
2827=item B<-t>, B<--tags>
2828
2829Show tags (symbolic names) in output.
2830
2831=item B<-T>, B<--tagdates>
2832
2833Show tags in output on their first occurance.
2834
2835=item B<--show-dead>
2836
2837Show dead files.
2838
2839=item B<--stdin>
2840
2841Read from stdin, don't run cvs log.
2842
2843=item B<--stdout>
2844
2845Output to stdout not to ChangeLog.
2846
2847=item B<-d>, B<--distributed>
2848
2849Put ChangeLogs in subdirs.
2850
2851=item B<-f> I<FILE>, B<--file> I<FILE>
2852
2853Write to I<FILE> instead of ChangeLog.
2854
2855=item B<--fsf>
2856
2857Use this if log data is in FSF ChangeLog style.
2858
2859=item B<--FSF>
2860
2861Attempt strict FSF-standard compatible output.
2862
2863=item B<-W> I<SECS>, B<--window> I<SECS>
2864
2865Window of time within which log entries unify.
2866
2867=item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2868
2869Expand usernames to email addresses from I<UFILE>.
2870
2871=item B<--passwd> I<PASSWORDFILE>
2872
2873Use system passwd file for user name expansion.  If no mail domain is provided
2874(via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2875-d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2876those options is successful. Use a domain of '' to prevent the addition of a
2877mail domain.
2878
2879=item B<--domain> I<DOMAIN>
2880
2881Domain to build email addresses from.
2882
2883=item B<--gecos>
2884
2885Get user information from GECOS data.
2886
2887=item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2888
2889Include only entries that match I<REGEXP>.  This option may be used multiple
2890times.
2891
2892=item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2893
2894Ignore files whose names match I<REGEXP>.  This option may be used multiple
2895times.  The regexp is a perl regular expression.  It is matched as is; you may
2896want to prefix with a ^ or suffix with a $ to anchor the match.
2897
2898=item B<-C>, B<--case-insensitive>
2899
2900Any regexp matching is done case-insensitively.
2901
2902=item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2903
2904Show only revisions on or ancestral to I<BRANCH>.
2905
2906=item B<--follow-only> I<BRANCH>
2907
2908Like --follow, but sub-branches are not followed.
2909
2910=item B<--no-ancestors>
2911
2912When using B<-F>, only track changes since the I<BRANCH> started.
2913
2914=item B<--no-hide-branch-additions>
2915
2916By default, entries generated by cvs for a file added on a branch (a dead 1.1
2917entry) are not shown.  This flag reverses that action.
2918
2919=item B<-S>, B<--separate-header>
2920
2921Blank line between each header and log message.
2922
2923=item B<--summary>
2924
2925Add CVS change summary information.
2926
2927=item B<--no-wrap>
2928
2929Don't auto-wrap log message (recommend B<-S> also).
2930
2931=item B<--no-indent>
2932
2933Don't indent log message
2934
2935=item B<--gmt>, B<--utc>
2936
2937Show times in GMT/UTC instead of local time.
2938
2939=item B<--accum>
2940
2941Add to an existing ChangeLog (incompatible with B<--xml>).
2942
2943=item B<-w>, B<--day-of-week>
2944
2945Show day of week.
2946
2947=item B<--no-times>
2948
2949Don't show times in output.
2950
2951=item B<--chrono>
2952
2953Output log in chronological order (default is reverse chronological order).
2954
2955=item B<--header> I<FILE>
2956
2957Get ChangeLog header from I<FILE> ("B<->" means stdin).
2958
2959=item B<--xml>
2960
2961Output XML instead of ChangeLog format.
2962
2963=item B<--xml-encoding> I<ENCODING.>
2964
2965Insert encoding clause in XML header.
2966
2967=item B<--noxmlns>
2968
2969Don't include xmlns= attribute in root element.
2970
2971=item B<--hide-filenames>
2972
2973Don't show filenames (ignored for XML output).
2974
2975=item B<--no-common-dir>
2976
2977Don't shorten directory names from filenames.
2978
2979=item B<--rcs> I<CVSROOT>
2980
2981Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2982output, stripping the prefix I<CVSROOT>.
2983
2984=item B<-P>, B<--prune>
2985
2986Don't show empty log messages.
2987
2988=item B<--lines-modified>
2989
2990Output the number of lines added and the number of lines removed for
2991each checkin (if applicable). At the moment, this only affects the
2992XML output mode.
2993
2994=item B<--ignore-tag> I<TAG>
2995
2996Ignore individual changes that are associated with a given tag.
2997May be repeated, if so, changes that are associated with any of
2998the given tags are ignored.
2999
3000=item B<--show-tag> I<TAG>
3001
3002Log only individual changes that are associated with a given
3003tag.  May be repeated, if so, changes that are associated with
3004any of the given tags are logged.
3005
3006=item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
3007
3008Attempt a delta between two tags (since I<FROM_TAG> up to and
3009including I<TO_TAG>).  The algorithm is a simple date-based one
3010(this is a hard problem) so results are imperfect.
3011
3012=item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
3013
3014Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
3015
3016=item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
3017
3018Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
3019
3020=back
3021
3022Notes about the options and arguments:
3023
3024=over 4
3025
3026=item *
3027
3028The B<-I> and B<-F> options may appear multiple times.
3029
3030=item *
3031
3032To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
3033okay because no would ever, ever be crazy enough to name a branch "trunk",
3034right?  Right.
3035
3036=item *
3037
3038For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
3039each line of I<UFILE> looks like this:
3040
3041       jrandom:jrandom@red-bean.com
3042
3043or maybe even like this
3044
3045       jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
3046
3047Don't forget to quote the portion after the colon if necessary.
3048
3049=item *
3050
3051Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
3052
3053       cvs2cl.pl -l "-d'DATESPEC'"
3054
3055where DATESPEC is any date specification valid for "cvs log -d".  (Note that
3056CVS 1.10.7 and below requires there be no space between -d and its argument).
3057
3058=item *
3059
3060Dates/times are interpreted in the local time zone.
3061
3062=item *
3063
3064Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3065spaces as argument separators.
3066
3067=item *
3068
3069See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3070systems) for more information.
3071
3072=item *
3073
3074Note that the rules for quoting under windows shells are different.
3075
3076=item *
3077
3078To run in an automated environment such as CGI or PHP, suidperl may be needed
3079in order to execute as the correct user to enable /cvsroot read lock files to
3080be written for the 'cvs log' command.  This is likely just a case of changing
3081the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
3082PATH variable.
3083
3084=back
3085
3086=head1 EXAMPLES
3087
3088Some examples (working on UNIX shells):
3089
3090      # logs after 6th March, 2003 (inclusive)
3091      cvs2cl.pl -l "-d'>2003-03-06'"
3092      # logs after 4:34PM 6th March, 2003 (inclusive)
3093      cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3094      # logs between 4:46PM 6th March, 2003 (exclusive) and
3095      # 4:34PM 6th March, 2003 (inclusive)
3096      cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3097
3098Some examples (on non-UNIX shells):
3099
3100      # Reported to work on windows xp/2000
3101      cvs2cl.pl -l  "-d"">2003-10-18;today<"""
3102
3103=head1 AUTHORS
3104
3105=over 4
3106
3107=item Karl Fogel
3108
3109=item Melissa O'Neill
3110
3111=item Martyn J. Pearce
3112
3113=back
3114
3115Contributions from
3116
3117=over 4
3118
3119=item Mike Ayers
3120
3121=item Tim Bradshaw
3122
3123=item Richard Broberg
3124
3125=item Nathan Bryant
3126
3127=item Oswald Buddenhagen
3128
3129=item Neil Conway
3130
3131=item Arthur de Jong
3132
3133=item Mark W. Eichin
3134
3135=item Dave Elcock
3136
3137=item Reid Ellis
3138
3139=item Simon Josefsson
3140
3141=item Robin Hugh Johnson
3142
3143=item Terry Kane
3144
3145=item Pete Kempf
3146
3147=item Akos Kiss
3148
3149=item Claus Klein
3150
3151=item Eddie Kohler
3152
3153=item Richard Laager
3154
3155=item Kevin Lilly
3156
3157=item Karl-Heinz Marbaise
3158
3159=item Mitsuaki Masuhara
3160
3161=item Henrik Nordstrom
3162
3163=item Joe Orton
3164
3165=item Peter Palfrader
3166
3167=item Thomas Parmelan
3168
3169=item Jordan Russell
3170
3171=item Jacek Sliwerski
3172
3173=item Johannes Stezenbach
3174
3175=item Joseph Walton
3176
3177=item Ernie Zapata
3178
3179=back
3180
3181=head1 BUGS
3182
3183Please report bugs to C<bug-cvs2cl@red-bean.com>.
3184
3185=head1 PREREQUISITES
3186
3187This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3188also seems to require C<Perl 5.004_04> or higher.
3189
3190=head1 OPERATING SYSTEM COMPATIBILITY
3191
3192Should work on any OS.
3193
3194=head1 SCRIPT CATEGORIES
3195
3196Version_Control/CVS
3197
3198=head1 COPYRIGHT
3199
3200(C) 2001,2002,2003,2004 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.
3201
3202(C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
3203
3204cvs2cl.pl is free software; you can redistribute it and/or modify
3205it under the terms of the GNU General Public License as published by
3206the Free Software Foundation; either version 2, or (at your option)
3207any later version.
3208
3209cvs2cl.pl is distributed in the hope that it will be useful,
3210but WITHOUT ANY WARRANTY; without even the implied warranty of
3211MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3212GNU General Public License for more details.
3213
3214You may have received a copy of the GNU General Public License
3215along with cvs2cl.pl; see the file COPYING.  If not, write to the
3216Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3217Boston, MA 02111-1307, USA.
3218
3219=head1 SEE ALSO
3220
3221cvs(1)
3222
3223