1#!/usr/bin/env python
2
3#    Copyright (C) 2000 Aladdin Enterprises.  All rights reserved.
4#
5# This program is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License as published by the
7# Free Software Foundation; either version 2 of the License, or (at your
8# option) any later version.
9#
10# This program is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
13# Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along
16# with this program; if not, write to the Free Software Foundation, Inc.,
17# 59 Temple Place, Suite 330, Boston, MA, 02111-1307.
18
19# $Id: cvs2hist.py,v 1.9.2.2.2.1 2003/04/12 14:02:39 giles Exp $
20
21# Convert the change notices in a CVS repository to the Ghostscript
22# History.htm file format.  Based on cvs2log.py by Henry Stiles
23# <henrys@artifex.com>.
24
25# Typical usage is: cvs2hist.py -j gs7_03 -v 7.04 > doc/Changes.htm.
26# The -j argument specifies a reference revision tag. This selects
27# log messages on the current branch (as determined by cvs status)
28# that are more recent than the reference tag.
29
30# ---------------- Generic utilities ---------------- #
31
32# Convert a date/time string in RCS format (yyyy/mm/dd hh:mm:ss) to
33# a time in seconds since the epoch.  Note that the result is local time,
34# since that is what the only available function (mktime) returns.
35def RCSDateToSeconds(date):
36    import string, time
37    (date_part, time_part) = string.split(date)
38    (year, month, day) = string.splitfields(date_part, '/')
39    (hour, minute, second) = string.splitfields(time_part, ':')
40    tuple = (string.atoi(year), string.atoi(month), string.atoi(day),
41             string.atoi(hour), string.atoi(minute), string.atoi(second),
42             0, 0, -1)
43    return time.mktime(tuple)
44
45# Create line-broken text from a list of items to be blank-separated.
46def LineBrokenText(indent, line_length, items):
47    leading_space = ' ' * indent
48    pos = -1
49    lines = leading_space
50    # Handle a leading tab or newline in items
51    first_char = items[0][0]
52    if first_char == '\n':
53        pos = pos - 1  # character has width 0
54    elif first_char == '\t':
55        pos = pos + 7  # character has width 8
56    for item in items:
57        if pos + 1 + len(item) > line_length:
58            lines = lines + '\n' + leading_space + item
59            pos = len(item)
60        else:
61            lines = lines + ' ' + item
62            pos = pos + 1 + len(item)
63    return lines[1:] + '\n'  # delete the unwanted first space
64
65# 'Normalize' the text of a named anchor to comply with spec
66def NormalizeAnchor(name):
67    import re
68    return re.sub('[^0-9a-zA-Z-_\.]', '_', name)
69
70# replace special characters with html entities
71# FIXME: isn't there a library call for this?
72def HTMLEncode(line):
73    import string
74    line = string.join(string.split(line,'&'),'&amp;')
75    line = string.join(string.split(line,'<'),'&lt;')
76    line = string.join(string.split(line,'>'),'&gt;')
77    return line
78
79# ---------------- CVS-specific code ---------------- #
80
81# Return the CVS repository root directory (the argument for -d in CVS
82# commands).
83def GetCVSRepository():
84    try:
85	fp = open('CVS/Root', 'r')
86    except:
87        print "Error: Cannot find CVS/Root"
88        return None
89    # get the Root name and strip off the newline
90    repos = fp.readline()[:-1]
91    fp.close()
92    return repos
93
94# Scan int.mak and lib.mak to find source files associated with the
95# interpreter and library respectively.
96def ScanMakefileForSources(filename, dict, value):
97    import re
98
99    try:
100        input = open(filename, 'r')
101    except:
102        try:
103            input = open('gs/' + filename, 'r')
104        except:
105            print "Error: Unable to open " + filename
106            return dict
107    lines = input.readlines()
108    input.close()
109    pattern = re.compile("[a-zA-Z][a-zA-Z0-9_]+[.][ch]")
110    for line in lines:
111        found = pattern.search(line)
112        if found != None:
113            dict['src/' + found.group()] = value
114    return dict
115
116# Classify a source file name according to what group it should go in.
117# This is very specific to Ghostscript.
118# Eventually we will replace this with separate subdirectories.
119import re
120SourceGroupPatterns = map(lambda pair: (re.compile(pair[0]), pair[1]), [
121    # Note that Python regex left-anchors the match: an explicit .* is
122    # needed for a floating match.
123    ["^doc/", "Documentation"],
124    ["^examples/", "Interpreter"],
125    ["^man/", "Documentation"],
126    ["^toolbin/", "Procedures"],
127    ["^lib/pdf_.*[.]ps$", "PDF Interpreter"],
128    ["^lib/gs_.*[.]ps$", "Interpreter"],
129    ["^lib/ht_.*[.]ps$", "Interpreter"],
130    ["^lib/.*[.]upp$", "Drivers"],
131    ["^lib/.*[.]x[bp]m$", "Interpreter"],
132    ["^lib/", "Utilities"],
133    ["^src/.*[.]bat$", "Procedures"],
134    ["^src/.*[.]cfg$", "Procedures"],
135    ["^src/.*[.]com$", "Procedures"],
136    ["^src/.*[.]cmd$", "Procedures"],
137    ["^src/.*[.]def$", "Procedures"],
138    ["^src/.*[.]m[am]k$", "Procedures"],
139    ["^src/.*[.]rc$", "Procedures"],
140    ["^src/.*[.]rps$", "Procedures"],
141    ["^src/.*[.]sh$", "Utilities"],
142    ["^src/gdev", "Drivers"],
143    ["^src/gen", "Utilities"],
144    ["^src/rpm", "Procedures"],
145    ["^src/[^.]*$", "Utilities"],
146    ["^src/d[pw]", "Interpreter"],
147    ["^src/.*[.]cpp$", "Interpreter"],
148    ["^src/.*[.]c$", "Utilities"],
149    ["", "Other"]    # This pattern must appear at the end.
150    ])
151def SourceFileGroup(filename, sources):
152    if sources.has_key(filename):
153        return sources[filename]
154    for pattern, group in SourceGroupPatterns:
155        if pattern.match(filename) != None:
156            return group
157
158# Create a version TOC.
159def VersionTOC(version, version_date, groups):
160    start = '<ul>\n<li>'
161    toc = ''
162    for group in groups:
163        toc = toc + '    <a href="#' + NormalizeAnchor(version + '-' + group) + '">' + group + '</a>,\n'
164    return start + toc[4:-2] + '\n</ul>\n'
165
166# Create a change log group header.
167def ChangeLogGroupHeader(group, previous_group, version):
168    header = '\n<h2><a name="' + NormalizeAnchor(version + '-' + group) + '"></a>' + group + '</h2><pre>'
169    if previous_group != None:
170        header = '\n</pre>' + header[1:]
171    return header
172
173# Create a change log section header.
174# Section 0 = fixes, section 1 = other.
175# Return (section header, line prefix)
176def ChangeLogSectionHeader(section, previous_section, version):
177    if section == 0:
178        return ("\nFixes problems:", "\t- ")
179    return (None, "\n")
180
181# Build the text for a patch.  (Not really implemented yet.)
182def BuildPatch(cvs_command, revision, rcs_file):
183    import os, string
184    # NB this needs work we only handle the special cases here.
185    rev_int_str = revision[:string.find(revision, '.')]
186    rev_frac_str = revision[string.find(revision, '.')+1:]
187    try:
188        prev_frac_int = string.atoi(rev_frac_str) - 1
189    except:
190        return "the patch must be created manually"
191    prev_revision = rev_int_str + '.' + `prev_frac_int`
192    patch_command = cvs_command + ' diff -C2 -r' + revision + ' -r' + prev_revision + ' ' + rcs_file
193    return os.popen(patch_command, 'r').readlines()
194
195# Create an individual history entry.
196def ChangeLogEntry(cvs_command, author, date, rev_files, description_lines, prefix, indent, line_length, patch, text_option):
197    import string, time
198    # Add the description.
199    description = ''
200    for line in description_lines:
201        description = description + line[:-1] + ' '  # drop trailing \n
202    if text_option == 0:
203	entry = string.split(string.strip(HTMLEncode(description)))
204    else:
205	entry = string.split(string.strip(description))
206    entry[0] = prefix + entry[0]
207    # Add the list of RCS files and revisions.
208    items = []
209    for revision, rcs_file in rev_files:
210        if rcs_file[:4] == 'src/':
211            rcs_file = rcs_file[4:]
212        items.append(rcs_file + ' [' + revision + ']' + ',')
213    items.sort()
214    items[0] = '(' + items[0]
215    items[-1] = items[-1][:-1] + ':'
216    # Add the date and author.
217    entry = entry + items + string.split(date) + [author + ')']
218    entry = LineBrokenText(0, line_length, entry)
219    # Add on the patches if necessary.
220    if ( patch == 1 ):
221        for revision, rcs_file in rev_files:
222            for patch_line in BuildPatch(cvs_command, revision, rcs_file):
223                entry = entry + patch_line
224    return entry
225
226# Find the current revision for each file in the current local copy.
227def GetCurrentRevisions(status_command, cvs_repository):
228    import os, re, string
229
230    cvs_path = string.split(cvs_repository, ':')[-1]
231    cvs_dir_n = len(string.split(cvs_path, '/')) + 1
232    status_file = os.popen(status_command, 'r')
233    rev_re = re.compile(r'\s*Repository revision:\s+(\S+)\s+(\S+),v')
234    current_revisions = {}
235    while 1:
236        line = status_file.readline()
237        if line == '': break
238        m = rev_re.match(line)
239        if m:
240            rev = m.group(1)
241            fn = string.join(string.split(m.group(2), '/')[cvs_dir_n:], '/')
242            current_revisions[fn] = rev
243    status_file.close()
244    return current_revisions
245
246# Return value: true if rev2 is a later revision derived from rev1.
247def RevisionLater(rev1, rev2):
248    import string
249    rev1_l = string.split(rev1, '.')
250    rev2_l = string.split(rev2, '.')
251
252    if len(rev2_l) < len(rev1_l):
253        return 0
254    for i in range(len(rev1_l)):
255        if i == len(rev2_l): return 1
256        if int(rev2_l[i]) < int(rev1_l[i]): return 0
257        if int(rev2_l[i]) > int(rev1_l[i]): return i == len(rev1_l) - 1
258    return 1
259
260# Build the combined CVS log.  We return an array of tuples of
261# (date, author, description, rcs_file, revision, tags, ref_rev).
262# The date is just a string in RCS format (yyyy/mm/dd hh:mm:ss).
263# The description is a sequence of text lines, each terminated with \n.
264def BuildLog(log_date_command, ref_tag):
265    import os, re, string
266
267    reading_description = 0
268    reading_tags = 0
269    description = []
270    log = []
271    tag_pattern = re.compile("^	([^:]+): ([0-9.]+)\n$")
272    branches_re = re.compile("^branches:")
273
274    log_file = os.popen(log_date_command, 'r')
275    while 1:
276        line = log_file.readline()
277        if line == '': break
278	if line[:5] == '=====' or line[:5] == '-----':
279	    if description != []:
280                try:
281                    my_tags = tags[revision]
282                except KeyError:
283                    my_tags = []
284		log.append((date, author, description, rcs_file, revision, my_tags, ref_rev))
285	    reading_description = 0
286	    description = []
287            continue
288	if reading_description:
289            # Omit initial empty description lines and branches info.
290            if description == [] and (line == '\n' or branches_re.match(line)):
291                continue
292	    description.append(line)
293            continue
294        if reading_tags:
295            match = tag_pattern.match(line)
296            if match == None:
297                reading_tags = 0
298                continue
299            tag = match.group(1)
300            revs = string.splitfields(match.group(2), ", ")
301            if tag == ref_tag:
302                ref_rev = revs[0]
303            for rev in revs:
304                try:
305                    tags[rev].append(tag)
306                except KeyError:
307                    tags[rev] = [tag]
308            continue
309	if line[:len("Working file: ")] == "Working file: ":
310	    rcs_file = line[len("Working file: "):-1]
311            tags = {}
312            ref_rev = None
313	elif line[:len("revision ")] == "revision ":
314	    revision = line[len("revision "):-1]
315	elif line[:len("date: ")] == "date: ":
316	    (dd, aa, ss, ll) = string.splitfields(line, ';')
317	    (discard, date) = string.splitfields(dd, ': ')
318	    (discard, author) = string.splitfields(aa, ': ')
319	    reading_description = 1
320        elif line[:len("symbolic names:")] == "symbolic names:":
321            reading_tags = 1
322    log_file.close()
323
324    return log
325
326# ---------------- Main program ---------------- #
327
328# make sure the group names normalize to distinct anchors!
329GroupOrder = {
330    "Documentation" : 1,
331    "Procedures" : 2,
332    "Utilities" : 3,
333    "Drivers" : 4,
334    "Platforms" : 5,
335    "Fonts" : 6,
336    "PDF writer" : 7,
337    "PDF Interpreter" : 8,
338    "Interpreter" : 9,
339    "Streams" : 10,
340    "Library" : 11,
341    "Other" : 12
342    }
343
344# Parse command line options and build logs.
345def main():
346    import sys, getopt, time, string, re
347    try:
348	opts, args = getopt.getopt(sys.argv[1:], "C:d:Hi:l:Mptr:v:j:",
349				   ["CVS_command",
350				    "date",
351                                    "indent",
352                                    "length",
353                                    "Merge",
354				    "patches",     #### not yet supported
355				    "text",
356				    "rlog_options", #### not yet supported
357                                    "version"
358				    ])
359
360    except getopt.error, msg:
361	sys.stdout = sys.stderr
362	print msg
363	print "Usage: cvs2hist ...options..."
364	print "Options: [-C CVS_command] [-d rcs_date] [-i indent] [-l length]"
365	print "         [-M] [-p] [-t] [-r rlog_options] [-v version]"
366        print "         [-j tag]"
367	sys.exit(2)
368
369    # Set up defaults for all of the command line options.
370    cvs_repository = GetCVSRepository()
371    if not cvs_repository:
372	print "cvs2hist must be executed in a working CVS directory"
373	sys.exit(2)
374    cvs_command = "cvs"
375    date_option = ""
376    indent = 0
377    length = 76
378    merge = 0
379    patches = 0
380    rlog_options = ""
381    text_option = 0;
382    ref_tag = None;
383    version = "CVS"
384    # override defaults if specified on the command line
385    for o, a in opts:
386	if o == '-C' : cvs_command = a
387	elif o == '-d' : date_option = "'-d>" + a + "'"
388	elif o == '-i' : indent = string.atoi(a)
389	elif o == '-l' : length = string.atoi(a)
390        elif o == '-M' : merge = 1
391        elif o == '-p' : patches = 1
392	elif o == '-t' : text_option = 1
393	elif o == '-r' : rlog_options = a
394	elif o == '-v' : version = a
395        elif o == '-j' : ref_tag = a
396	else: print "getopt should have failed already"
397
398    status_command = cvs_command + ' -d ' + cvs_repository + ' -Q status'
399    cur_revisions = GetCurrentRevisions(status_command, cvs_repository)
400    # set up the cvs log command arguments.
401    log_date_command = cvs_command + ' -d ' + cvs_repository +' -Q log ' + date_option + ' ' + rlog_options
402    # Acquire the log data.
403    log = BuildLog(log_date_command, ref_tag)
404    # Scan the makefiles to find source file names.
405    sources = {}
406    sources = ScanMakefileForSources('src/lib.mak', sources, "Library")
407    for key in sources.keys():
408        if key[:5] == 'src/s':
409            sources[key] = "Streams"
410    sources = ScanMakefileForSources('src/devs.mak', sources, "Drivers")
411    for key in sources.keys():
412        if key[:10] == 'src/gdevpd':
413            sources[key] = "PDF writer"
414    sources = ScanMakefileForSources('src/int.mak', sources, "Interpreter")
415    sources = ScanMakefileForSources('src/contrib.mak', sources, "Drivers")
416    # Sort the log by group, then by fix/non-fix, then by date, then by
417    # description (to group logically connected files together).
418    sorter = []
419    group_pattern = re.compile("^(\([^)]+\))[ ]+")
420    for date, author, text_lines, file, revision, tags, ref_rev in log:
421        if not cur_revisions.has_key(file):
422            continue
423        if not RevisionLater(revision, cur_revisions[file]):
424            continue
425        if ref_rev and (ref_rev == revision or not RevisionLater(ref_rev, revision)):
426            continue
427        line = ''
428        while len(text_lines) > 0:
429            line = string.strip(text_lines[0])
430            if line != '':
431                break
432            text_lines[:1] = []
433        if merge:
434            group = "(all)"
435        elif group_pattern.match(text_lines[0]) != None:
436            match = group_pattern.match(text_lines[0])
437            group = match.group(1)
438            text_lines[0] = text_lines[0][len(match.group(0)):]
439        else:
440            group = SourceFileGroup(file, sources)
441        try:
442            group_order = GroupOrder[group]
443        except KeyError:
444            group_order = 99
445        if line[:4] == "Fix:":
446            text_lines[0] = line[4:] + '\n'
447            section = 0
448        else:
449            section = re.match("^Fix", text_lines[0]) < 0
450        sorter.append((group_order, section, date, group, text_lines, author, file, revision, tags))
451    sorter.sort()
452    log = sorter
453    # Print the HTML header.
454    time_now = time.localtime(time.time())
455    version_date = time.strftime('%Y-%m-%d', time_now)
456    version_time = time.strftime('%Y-%m-%d %H:%M:%S', time_now)
457    if text_option == 0:
458	print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
459        print "<html><head>"
460        print "<title>Ghostscript " + version + " change history as of " + version_time + "</title>"
461	print "<link rel=stylesheet type=\"text/css\" href=\"gs.css\">"
462        print "</head><body>\n"
463	print '<!-- cvs command: ' + log_date_command + ' -->\n'
464
465        last_group = None
466        groups = []
467        for omit_group_order, section, date, group, description, author, rcs_file, revision, tags in log:
468            if group != last_group:
469                groups.append(group)
470                last_group = group
471        print VersionTOC(version, version_date, groups)
472    else:
473        print "Ghostscript change history as of " + version_time
474    # Pass through the logs creating new entries based on changing
475    # authors, groups, dates and descriptions.
476    last_group = None
477    last_section = None
478    last_description = None
479    last_author = None
480    last_date = None
481    rev_files = []
482    for omit_group_order, section, date, group, description, author, rcs_file, revision, tags in log:
483        if group != last_group:
484            if rev_files != []:
485                print ChangeLogEntry(cvs_command, last_author, last_date, rev_files, last_description, prefix, indent, length, patches, text_option)[:-1]
486                rev_files = []
487	    if text_option == 0:
488		print ChangeLogGroupHeader(group, last_group, version)
489	    else:
490		print '\n****** ' + group + ' ******'
491            last_group = group
492            last_section = None
493            last_description = None
494        if section != last_section:
495            if rev_files != []:
496                print ChangeLogEntry(cvs_command, last_author, last_date, rev_files, last_description, prefix, indent, length, patches, text_option)[:-1]
497                rev_files = []
498	    (header, prefix) = ChangeLogSectionHeader(section, last_section, version)
499            if header != None:
500                print header
501            last_section = section
502            last_description = None
503	if author != last_author or description != last_description or abs(RCSDateToSeconds(date) - RCSDateToSeconds(last_date)) >= 3:
504            if rev_files != []:
505                print ChangeLogEntry(cvs_command, last_author, last_date, rev_files, last_description, prefix, indent, length, patches, text_option)[:-1]
506                rev_files = []
507            last_author = author
508            last_date = date
509            last_description = description
510	# Accumulate the revisions and RCS files.
511        rev_files.append((revision, rcs_file))
512
513    # print the last entry if there is one (i.e. the last two entries
514    # have the same author and date)
515    if rev_files != []:
516        print ChangeLogEntry(cvs_command, last_author, last_date, rev_files, last_description, prefix, indent, length, patches, text_option)[:-1]
517    # Print the HTML trailer.
518    if text_option == 0:
519        print "\n</pre></body></html>"
520
521if __name__ == '__main__':
522    main()
523