xref: /openbsd/gnu/usr.bin/cvs/contrib/rcs2log.sh (revision 09467b48)
1#! /bin/sh
2
3# RCS to ChangeLog generator
4
5# Generate a change log prefix from RCS files (perhaps in the CVS repository)
6# and the ChangeLog (if any).
7# Output the new prefix to standard output.
8# You can edit this prefix by hand, and then prepend it to ChangeLog.
9
10# Ignore log entries that start with `#'.
11# Clump together log entries that start with `{topic} ',
12# where `topic' contains neither white space nor `}'.
13
14Help='The default FILEs are the files registered under the working directory.
15Options:
16
17  -c CHANGELOG  Output a change log prefix to CHANGELOG (default ChangeLog).
18  -h HOSTNAME  Use HOSTNAME in change log entries (default current host).
19  -i INDENT  Indent change log lines by INDENT spaces (default 8).
20  -l LENGTH  Try to limit log lines to LENGTH characters (default 79).
21  -R  If no FILEs are given and RCS is used, recurse through working directory.
22  -r OPTION  Pass OPTION to subsidiary log command.
23  -t TABWIDTH  Tab stops are every TABWIDTH characters (default 8).
24  -u "LOGIN<tab>FULLNAME<tab>MAILADDR"  Assume LOGIN has FULLNAME and MAILADDR.
25  -v  Append RCS revision to file names in log lines.
26  --help  Output help.
27  --version  Output version number.
28
29Report bugs to <bug-gnu-emacs@gnu.org>.'
30
31Id='$Id: rcs2log.sh,v 1.2 2001/08/07 22:00:56 millert Exp $'
32
33# Copyright 1992, 93, 94, 95, 96, 97, 1998 Free Software Foundation, Inc.
34
35# This program is free software; you can redistribute it and/or modify
36# it under the terms of the GNU General Public License as published by
37# the Free Software Foundation; either version 2, or (at your option)
38# any later version.
39#
40# This program is distributed in the hope that it will be useful,
41# but WITHOUT ANY WARRANTY; without even the implied warranty of
42# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
43# GNU General Public License for more details.
44#
45# You should have received a copy of the GNU General Public License
46# along with this program; see the file COPYING.  If not, write to the
47# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
48# Boston, MA 02111-1307, USA.
49
50Copyright='Copyright 1998 Free Software Foundation, Inc.
51This program comes with NO WARRANTY, to the extent permitted by law.
52You may redistribute copies of this program
53under the terms of the GNU General Public License.
54For more information about these matters, see the files named COPYING.
55Author: Paul Eggert <eggert@twinsun.com>'
56
57tab='	'
58nl='
59'
60
61# Parse options.
62
63# defaults
64: ${AWK=awk}
65: ${TMPDIR=/tmp}
66changelog=ChangeLog # change log file name
67datearg= # rlog date option
68hostname= # name of local host (if empty, will deduce it later)
69indent=8 # indent of log line
70length=79 # suggested max width of log line
71logins= # login names for people we know fullnames and mailaddrs of
72loginFullnameMailaddrs= # login<tab>fullname<tab>mailaddr triplets
73logTZ= # time zone for log dates (if empty, use local time)
74recursive= # t if we want recursive rlog
75revision= # t if we want revision numbers
76rlog_options= # options to pass to rlog
77tabwidth=8 # width of horizontal tab
78
79while :
80do
81	case $1 in
82	-c)	changelog=${2?}; shift;;
83	-i)	indent=${2?}; shift;;
84	-h)	hostname=${2?}; shift;;
85	-l)	length=${2?}; shift;;
86	-[nu])	# -n is obsolescent; it is replaced by -u.
87		case $1 in
88		-n)	case ${2?}${3?}${4?} in
89			*"$tab"* | *"$nl"*)
90				echo >&2 "$0: -n '$2' '$3' '$4': tabs, newlines not allowed"
91				exit 1
92			esac
93			case $loginFullnameMailaddrs in
94			'') loginFullnameMailaddrs=$2$tab$3$tab$4;;
95			?*) loginFullnameMailaddrs=$loginFullnameMailaddrs$nl$2$tab$3$tab$4
96			esac
97			shift; shift; shift;;
98		-u)
99			# If $2 is not tab-separated, use colon for separator.
100			case ${2?} in
101			*"$nl"*)
102				echo >&2 "$0: -u '$2': newlines not allowed"
103				exit 1;;
104			*"$tab"*)
105				t=$tab;;
106			*)
107				t=:
108			esac
109			case $2 in
110			*"$t"*"$t"*"$t"*)
111				echo >&2 "$0: -u '$2': too many fields"
112				exit 1;;
113			*"$t"*"$t"*)
114				;;
115			*)
116				echo >&2 "$0: -u '$2': not enough fields"
117				exit 1
118			esac
119			case $loginFullnameMailaddrs in
120			'') loginFullnameMailaddrs=$2;;
121			?*) loginFullnameMailaddrs=$loginFullnameMailaddrs$nl$2
122			esac
123			shift
124		esac
125		case $logins in
126		'') logins=$login;;
127		?*) logins=$logins$nl$login
128		esac
129		;;
130	-r)
131		case $rlog_options in
132		'') rlog_options=${2?};;
133		?*) rlog_options=$rlog_options$nl${2?}
134		esac
135		shift;;
136	-R)	recursive=t;;
137	-t)	tabwidth=${2?}; shift;;
138	-v)	revision=t;;
139	--version)
140		set $Id
141		rcs2logVersion=$3
142		echo >&2 "rcs2log (GNU Emacs) $rcs2logVersion$nl$Copyright"
143		exit 0;;
144	-*)	echo >&2 "Usage: $0 [OPTION]... [FILE ...]$nl$Help"
145		case $1 in
146		--help) exit 0;;
147		*) exit 1
148		esac;;
149	*)	break
150	esac
151	shift
152done
153
154month_data='
155	m[0]="Jan"; m[1]="Feb"; m[2]="Mar"
156	m[3]="Apr"; m[4]="May"; m[5]="Jun"
157	m[6]="Jul"; m[7]="Aug"; m[8]="Sep"
158	m[9]="Oct"; m[10]="Nov"; m[11]="Dec"
159'
160
161
162# Put rlog output into $rlogout.
163
164# If no rlog options are given,
165# log the revisions checked in since the first ChangeLog entry.
166# Since ChangeLog is only by date, some of these revisions may be duplicates of
167# what's already in ChangeLog; it's the user's responsibility to remove them.
168case $rlog_options in
169'')
170	if test -s "$changelog"
171	then
172		e='
173			/^[0-9]+-[0-9][0-9]-[0-9][0-9]/{
174				# ISO 8601 date
175				print $1
176				exit
177			}
178			/^... ... [ 0-9][0-9] [ 0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9]+ /{
179				# old-fashioned date and time (Emacs 19.31 and earlier)
180				'"$month_data"'
181				year = $5
182				for (i=0; i<=11; i++) if (m[i] == $2) break
183				dd = $3
184				printf "%d-%02d-%02d\n", year, i+1, dd
185				exit
186			}
187		'
188		d=`$AWK "$e" <"$changelog"` || exit
189		case $d in
190		?*) datearg="-d>$d"
191		esac
192	fi
193esac
194
195# Use TZ specified by ChangeLog local variable, if any.
196if test -s "$changelog"
197then
198	extractTZ='
199		/^.*change-log-time-zone-rule['"$tab"' ]*:['"$tab"' ]*"\([^"]*\)".*/{
200			s//\1/; p; q
201		}
202		/^.*change-log-time-zone-rule['"$tab"' ]*:['"$tab"' ]*t.*/{
203			s//UTC0/; p; q
204		}
205	'
206	logTZ=`tail "$changelog" | sed -n "$extractTZ"`
207	case $logTZ in
208	?*) TZ=$logTZ; export TZ
209	esac
210fi
211
212# If CVS is in use, examine its repository, not the normal RCS files.
213if test ! -f CVS/Repository
214then
215	rlog=rlog
216	repository=
217else
218	rlog='cvs -q log'
219	repository=`sed 1q <CVS/Repository` || exit
220	test ! -f CVS/Root || CVSROOT=`cat <CVS/Root` || exit
221	case $CVSROOT in
222	*:/*)
223		# remote repository
224		;;
225	*)
226		# local repository
227		case $repository in
228		/*) ;;
229		*) repository=${CVSROOT?}/$repository
230		esac
231		if test ! -d "$repository"
232		then
233			echo >&2 "$0: $repository: bad repository (see CVS/Repository)"
234			exit 1
235		fi
236	esac
237fi
238
239# Use $rlog's -zLT option, if $rlog supports it.
240case `$rlog -zLT 2>&1` in
241*' option'*) ;;
242*)
243	case $rlog_options in
244	'') rlog_options=-zLT;;
245	?*) rlog_options=-zLT$nl$rlog_options
246	esac
247esac
248
249# With no arguments, examine all files under the RCS directory.
250case $# in
2510)
252	case $repository in
253	'')
254		oldIFS=$IFS
255		IFS=$nl
256		case $recursive in
257		t)
258			RCSdirs=`find . -name RCS -type d -print`
259			filesFromRCSfiles='s|,v$||; s|/RCS/|/|; s|^\./||'
260			files=`
261				{
262					case $RCSdirs in
263					?*) find $RCSdirs \
264							-type f \
265							! -name '*_' \
266							! -name ',*,' \
267							! -name '.*_' \
268							! -name .rcsfreeze.log \
269							! -name .rcsfreeze.ver \
270							-print
271					esac
272					find . -name '*,v' -print
273				} |
274				sort -u |
275				sed "$filesFromRCSfiles"
276			`;;
277		*)
278			files=
279			for file in RCS/.* RCS/* .*,v *,v
280			do
281				case $file in
282				RCS/. | RCS/.. | RCS/,*, | RCS/*_) continue;;
283				RCS/.rcsfreeze.log | RCS/.rcsfreeze.ver) continue;;
284				RCS/.\* | RCS/\* | .\*,v | \*,v) test -f "$file" || continue;;
285				RCS/*,v | RCS/.*,v) ;;
286				RCS/* | RCS/.*) test -f "$file" || continue
287				esac
288				case $files in
289				'') files=$file;;
290				?*) files=$files$nl$file
291				esac
292			done
293			case $files in
294			'') exit 0
295			esac
296		esac
297		set x $files
298		shift
299		IFS=$oldIFS
300	esac
301esac
302
303llogout=`mktemp $TMPDIR/rcs2log_l.XXXXXXXXXX` || exit 1
304rlogout=`mktemp $TMPDIR/rcs2log_r.XXXXXXXXXX` || {
305	rm -f $llogout
306	exit 1
307}
308trap exit 1 2 13 15
309trap "rm -f $llogout $rlogout; exit 1" 0
310
311case $datearg in
312?*) $rlog $rlog_options "$datearg" ${1+"$@"} >$rlogout;;
313'') $rlog $rlog_options ${1+"$@"} >$rlogout
314esac || exit
315
316
317# Get the full name of each author the logs mention, and set initialize_fullname
318# to awk code that initializes the `fullname' awk associative array.
319# Warning: foreign authors (i.e. not known in the passwd file) are mishandled;
320# you have to fix the resulting output by hand.
321
322initialize_fullname=
323initialize_mailaddr=
324
325case $loginFullnameMailaddrs in
326?*)
327	case $loginFullnameMailaddrs in
328	*\"* | *\\*)
329		sed 's/["\\]/\\&/g' >$llogout <<EOF || exit
330$loginFullnameMailaddrs
331EOF
332		loginFullnameMailaddrs=`cat $llogout`
333	esac
334
335	oldIFS=$IFS
336	IFS=$nl
337	for loginFullnameMailaddr in $loginFullnameMailaddrs
338	do
339		case $loginFullnameMailaddr in
340		*"$tab"*) IFS=$tab;;
341		*) IFS=:
342		esac
343		set x $loginFullnameMailaddr
344		login=$2
345		fullname=$3
346		mailaddr=$4
347		initialize_fullname="$initialize_fullname
348			fullname[\"$login\"] = \"$fullname\""
349		initialize_mailaddr="$initialize_mailaddr
350			mailaddr[\"$login\"] = \"$mailaddr\""
351	done
352	IFS=$oldIFS
353esac
354
355case $llogout in
356?*) sort -u -o $llogout <<EOF || exit
357$logins
358EOF
359esac
360output_authors='/^date: / {
361	if ($2 ~ /^[0-9]*[-\/][0-9][0-9][-\/][0-9][0-9]$/ && $3 ~ /^[0-9][0-9]:[0-9][0-9]:[0-9][0-9][-+0-9:]*;$/ && $4 == "author:" && $5 ~ /^[^;]*;$/) {
362		print substr($5, 1, length($5)-1)
363	}
364}'
365authors=`
366	$AWK "$output_authors" <$rlogout |
367	case $llogout in
368	'') sort -u;;
369	?*) sort -u | comm -23 - $llogout
370	esac
371`
372case $authors in
373?*)
374	cat >$llogout <<EOF || exit
375$authors
376EOF
377	initialize_author_script='s/["\\]/\\&/g; s/.*/author[\"&\"] = 1/'
378	initialize_author=`sed -e "$initialize_author_script" <$llogout`
379	awkscript='
380		BEGIN {
381			alphabet = "abcdefghijklmnopqrstuvwxyz"
382			ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
383			'"$initialize_author"'
384		}
385		{
386			if (author[$1]) {
387				fullname = $5
388				if (fullname ~ /[0-9]+-[^(]*\([0-9]+\)$/) {
389					# Remove the junk from fullnames like "0000-Admin(0000)".
390					fullname = substr(fullname, index(fullname, "-") + 1)
391					fullname = substr(fullname, 1, index(fullname, "(") - 1)
392				}
393				if (fullname ~ /,[^ ]/) {
394					# Some sites put comma-separated junk after the fullname.
395					# Remove it, but leave "Bill Gates, Jr" alone.
396					fullname = substr(fullname, 1, index(fullname, ",") - 1)
397				}
398				abbr = index(fullname, "&")
399				if (abbr) {
400					a = substr($1, 1, 1)
401					A = a
402					i = index(alphabet, a)
403					if (i) A = substr(ALPHABET, i, 1)
404					fullname = substr(fullname, 1, abbr-1) A substr($1, 2) substr(fullname, abbr+1)
405				}
406
407				# Quote quotes and backslashes properly in full names.
408				# Do not use gsub; traditional awk lacks it.
409				quoted = ""
410				rest = fullname
411				for (;;) {
412					p = index(rest, "\\")
413					q = index(rest, "\"")
414					if (p) {
415						if (q && q<p) p = q
416					} else {
417						if (!q) break
418						p = q
419					}
420					quoted = quoted substr(rest, 1, p-1) "\\" substr(rest, p, 1)
421					rest = substr(rest, p+1)
422				}
423
424				printf "fullname[\"%s\"] = \"%s%s\"\n", $1, quoted, rest
425				author[$1] = 0
426			}
427		}
428	'
429
430	initialize_fullname=`
431		{
432			(getent passwd $authors) ||
433			(
434				cat /etc/passwd
435				for author in $authors
436				do NIS_PATH= nismatch $author passwd.org_dir
437				done
438				ypmatch $authors passwd
439			)
440		} 2>/dev/null |
441		$AWK -F: "$awkscript"
442	`$initialize_fullname
443esac
444
445
446# Function to print a single log line.
447# We don't use awk functions, to stay compatible with old awk versions.
448# `Log' is the log message (with \n replaced by \001).
449# `files' contains the affected files.
450printlogline='{
451
452	# Following the GNU coding standards, rewrite
453	#	* file: (function): comment
454	# to
455	#	* file (function): comment
456	if (Log ~ /^\([^)]*\): /) {
457		i = index(Log, ")")
458		files = files " " substr(Log, 1, i)
459		Log = substr(Log, i+3)
460	}
461
462	# If "label: comment" is too long, break the line after the ":".
463	sep = " "
464	if ('"$length"' <= '"$indent"' + 1 + length(files) + index(Log, SOH)) sep = "\n" indent_string
465
466	# Print the label.
467	printf "%s*%s:", indent_string, files
468
469	# Print each line of the log, transliterating \001 to \n.
470	while ((i = index(Log, SOH)) != 0) {
471		logline = substr(Log, 1, i-1)
472		if (logline ~ /[^'"$tab"' ]/) {
473			printf "%s%s\n", sep, logline
474		} else {
475			print ""
476		}
477		sep = indent_string
478		Log = substr(Log, i+1)
479	}
480}'
481
482# Pattern to match the `revision' line of rlog output.
483rlog_revision_pattern='^revision [0-9]+\.[0-9]+(\.[0-9]+\.[0-9]+)*(['"$tab"' ]+locked by: [^'"$tab"' $,.0-9:;@]*[^'"$tab"' $,:;@][^'"$tab"' $,.0-9:;@]*;)?['"$tab"' ]*$'
484
485case $hostname in
486'')
487	hostname=`(
488		hostname || uname -n || uuname -l || cat /etc/whoami
489	) 2>/dev/null` || {
490		echo >&2 "$0: cannot deduce hostname"
491		exit 1
492	}
493
494	case $hostname in
495	*.*) ;;
496	*)
497		domainname=`(domainname) 2>/dev/null` &&
498		case $domainname in
499		*.*) hostname=$hostname.$domainname
500		esac
501	esac
502esac
503
504
505# Process the rlog output, generating ChangeLog style entries.
506
507# First, reformat the rlog output so that each line contains one log entry.
508# Transliterate \n to \001 so that multiline entries fit on a single line.
509# Discard irrelevant rlog output.
510$AWK <$rlogout '
511	BEGIN { repository = "'"$repository"'" }
512	/^RCS file:/ {
513		if (repository != "") {
514			filename = $3
515			if (substr(filename, 1, length(repository) + 1) == repository "/") {
516				filename = substr(filename, length(repository) + 2)
517			}
518			if (filename ~ /,v$/) {
519				filename = substr(filename, 1, length(filename) - 2)
520			}
521			if (filename ~ /(^|\/)Attic\/[^\/]*$/) {
522				i = length(filename)
523				while (substr(filename, i, 1) != "/") i--
524				filename = substr(filename, 1, i - 6) substr(filename, i + 1)
525			}
526		}
527		rev = "?"
528	}
529	/^Working file:/ { if (repository == "") filename = $3 }
530	/'"$rlog_revision_pattern"'/, /^(-----------*|===========*)$/ {
531		line = $0
532		if (line ~ /'"$rlog_revision_pattern"'/) {
533			rev = $2
534			next
535		}
536		if (line ~ /^date: [0-9][- +\/0-9:]*;/) {
537			date = $2
538			if (date ~ /\//) {
539				# This is a traditional RCS format date YYYY/MM/DD.
540				# Replace "/"s with "-"s to get ISO format.
541				newdate = ""
542				while ((i = index(date, "/")) != 0) {
543					newdate = newdate substr(date, 1, i-1) "-"
544					date = substr(date, i+1)
545				}
546				date = newdate date
547			}
548			time = substr($3, 1, length($3) - 1)
549			author = substr($5, 1, length($5)-1)
550			printf "%s %s %s %s %s %c", filename, rev, date, time, author, 1
551			rev = "?"
552			next
553		}
554		if (line ~ /^branches: /) { next }
555		if (line ~ /^(-----------*|===========*)$/) { print ""; next }
556		if (line == "Initial revision" || line ~ /^file .+ was initially added on branch .+\.$/) {
557			line = "New file."
558		}
559		printf "%s%c", line, 1
560	}
561' |
562
563# Now each line is of the form
564# FILENAME REVISION YYYY-MM-DD HH:MM:SS[+-TIMEZONE] AUTHOR \001LOG
565#	where \001 stands for a carriage return,
566#	and each line of the log is terminated by \001 instead of \n.
567# Sort the log entries, first by date+time (in reverse order),
568# then by author, then by log entry, and finally by file name and revision
569# (just in case).
570sort +2 -4r +4 +0 |
571
572# Finally, reformat the sorted log entries.
573$AWK '
574	BEGIN {
575		logTZ = "'"$logTZ"'"
576		revision = "'"$revision"'"
577
578		# Some awk variants do not understand "\001", so we have to
579		# put the char directly in the file.
580		SOH="" # <-- There is a single SOH (octal code 001) here.
581
582		# Initialize the fullname and mailaddr associative arrays.
583		'"$initialize_fullname"'
584		'"$initialize_mailaddr"'
585
586		# Initialize indent string.
587		indent_string = ""
588		i = '"$indent"'
589		if (0 < '"$tabwidth"')
590			for (;  '"$tabwidth"' <= i;  i -= '"$tabwidth"')
591				indent_string = indent_string "\t"
592		while (1 <= i--)
593			indent_string = indent_string " "
594	}
595
596	{
597		newlog = substr($0, 1 + index($0, SOH))
598
599		# Ignore log entries prefixed by "#".
600		if (newlog ~ /^#/) { next }
601
602		if (Log != newlog || date != $3 || author != $5) {
603
604			# The previous log and this log differ.
605
606			# Print the old log.
607			if (date != "") '"$printlogline"'
608
609			# Logs that begin with "{clumpname} " should be grouped together,
610			# and the clumpname should be removed.
611			# Extract the new clumpname from the log header,
612			# and use it to decide whether to output a blank line.
613			newclumpname = ""
614			sep = "\n"
615			if (date == "") sep = ""
616			if (newlog ~ /^\{[^'"$tab"' }]*}['"$tab"' ]/) {
617				i = index(newlog, "}")
618				newclumpname = substr(newlog, 1, i)
619				while (substr(newlog, i+1) ~ /^['"$tab"' ]/) i++
620				newlog = substr(newlog, i+1)
621				if (clumpname == newclumpname) sep = ""
622			}
623			printf sep
624			clumpname = newclumpname
625
626			# Get ready for the next log.
627			Log = newlog
628			if (files != "")
629				for (i in filesknown)
630					filesknown[i] = 0
631			files = ""
632		}
633		if (date != $3  ||  author != $5) {
634			# The previous date+author and this date+author differ.
635			# Print the new one.
636			date = $3
637			time = $4
638			author = $5
639
640			zone = ""
641			if (logTZ && ((i = index(time, "-")) || (i = index(time, "+"))))
642				zone = " " substr(time, i)
643
644			# Print "date[ timezone]  fullname  <email address>".
645			# Get fullname and email address from associative arrays;
646			# default to author and author@hostname if not in arrays.
647			if (fullname[author])
648				auth = fullname[author]
649			else
650				auth = author
651			printf "%s%s  %s  ", date, zone, auth
652			if (mailaddr[author])
653				printf "<%s>\n\n", mailaddr[author]
654			else
655				printf "<%s@%s>\n\n", author, "'"$hostname"'"
656		}
657		if (! filesknown[$1]) {
658			filesknown[$1] = 1
659			if (files == "") files = " " $1
660			else files = files ", " $1
661			if (revision && $2 != "?") files = files " " $2
662		}
663	}
664	END {
665		# Print the last log.
666		if (date != "") {
667			'"$printlogline"'
668			printf "\n"
669		}
670	}
671' &&
672
673
674# Exit successfully.
675
676exec rm -f $llogout $rlogout
677
678# Local Variables:
679# tab-width:4
680# End:
681