1#!/usr/bin/python
2
3# Copyright (C) 2017 Free Software Foundation, Inc.
4#
5# This file is part of GCC.
6#
7# GCC is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 3, or (at your option)
10# any later version.
11#
12# GCC is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with GCC; see the file COPYING.  If not, write to
19# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301, USA.
21
22# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23# and adds a skeleton ChangeLog file to the file. It does not try to be
24# too smart when parsing function names, but it produces a reasonable
25# approximation.
26#
27# This is a straightforward adaptation of original Perl script.
28#
29# Author: Yury Gribov <tetra2005@gmail.com>
30
31import sys
32import re
33import os.path
34import os
35import getopt
36import tempfile
37import time
38import shutil
39from subprocess import Popen, PIPE
40
41me = os.path.basename(sys.argv[0])
42
43def error(msg):
44  sys.stderr.write("%s: error: %s\n" % (me, msg))
45  sys.exit(1)
46
47def warn(msg):
48  sys.stderr.write("%s: warning: %s\n" % (me, msg))
49
50class RegexCache(object):
51  """Simple trick to Perl-like combined match-and-bind."""
52
53  def __init__(self):
54    self.last_match = None
55
56  def match(self, p, s):
57    self.last_match = re.match(p, s) if isinstance(p, str) else p.match(s)
58    return self.last_match
59
60  def search(self, p, s):
61    self.last_match = re.search(p, s) if isinstance(p, str) else p.search(s)
62    return self.last_match
63
64  def group(self, n):
65    return self.last_match.group(n)
66
67cache = RegexCache()
68
69def print_help_and_exit():
70    print """\
71Usage: %s [-i | --inline] [PATCH]
72Generate ChangeLog template for PATCH.
73PATCH must be generated using diff(1)'s -up or -cp options
74(or their equivalent in Subversion/git).
75
76When PATCH is - or missing, read standard input.
77
78When -i is used, prepends ChangeLog to PATCH.
79If PATCH is not stdin, modifies PATCH in-place, otherwise writes
80to stdout.
81""" % me
82    sys.exit(1)
83
84def run(cmd, die_on_error):
85  """Simple wrapper for Popen."""
86  proc = Popen(cmd.split(' '), stderr = PIPE, stdout = PIPE)
87  (out, err) = proc.communicate()
88  if die_on_error and proc.returncode != 0:
89    error("`%s` failed:\n" % (cmd, proc.stderr))
90  return proc.returncode, out, err
91
92def read_user_info():
93  dot_mklog_format_msg = """\
94The .mklog format is:
95NAME = ...
96EMAIL = ...
97"""
98
99  # First try to read .mklog config
100  mklog_conf = os.path.expanduser('~/.mklog')
101  if os.path.exists(mklog_conf):
102    attrs = {}
103    f = open(mklog_conf, 'rb')
104    for s in f:
105      if cache.match(r'^\s*([a-zA-Z0-9_]+)\s*=\s*(.*?)\s*$', s):
106        attrs[cache.group(1)] = cache.group(2)
107    f.close()
108    if 'NAME' not in attrs:
109      error("'NAME' not present in .mklog")
110    if 'EMAIL' not in attrs:
111      error("'EMAIL' not present in .mklog")
112    return attrs['NAME'], attrs['EMAIL']
113
114  # Otherwise go with git
115
116  rc1, name, _ = run('git config user.name', False)
117  name = name.rstrip()
118  rc2, email, _ = run('git config user.email', False)
119  email = email.rstrip()
120
121  if rc1 != 0 or rc2 != 0:
122    error("""\
123Could not read git user.name and user.email settings.
124Please add missing git settings, or create a %s.
125""" % mklog_conf)
126
127  return name, email
128
129def get_parent_changelog (s):
130  """See which ChangeLog this file change should go to."""
131
132  if s.find('\\') == -1 and s.find('/') == -1:
133    return "ChangeLog", s
134
135  gcc_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
136
137  d = s
138  while d:
139    clname = d + "/ChangeLog"
140    if os.path.exists(gcc_root + '/' + clname) or os.path.exists(clname):
141      relname = s[len(d)+1:]
142      return clname, relname
143    d, _ = os.path.split(d)
144
145  return "Unknown ChangeLog", s
146
147class FileDiff:
148  """Class to represent changes in a single file."""
149
150  def __init__(self, filename):
151    self.filename = filename
152    self.hunks = []
153    self.clname, self.relname = get_parent_changelog(filename);
154
155  def dump(self):
156    print "Diff for %s:\n  ChangeLog = %s\n  rel name = %s\n" % (self.filename, self.clname, self.relname)
157    for i, h in enumerate(self.hunks):
158      print "Next hunk %d:" % i
159      h.dump()
160
161class Hunk:
162  """Class to represent a single hunk of changes."""
163
164  def __init__(self, hdr):
165    self.hdr = hdr
166    self.lines = []
167    self.ctx_diff = is_ctx_hunk_start(hdr)
168
169  def dump(self):
170    print '%s' % self.hdr
171    print '%s' % '\n'.join(self.lines)
172
173  def is_file_addition(self):
174    """Does hunk describe addition of file?"""
175    if self.ctx_diff:
176      for line in self.lines:
177        if re.match(r'^\*\*\* 0 \*\*\*\*', line):
178          return True
179    else:
180      return re.match(r'^@@ -0,0 \+1.* @@', self.hdr)
181
182  def is_file_removal(self):
183    """Does hunk describe removal of file?"""
184    if self.ctx_diff:
185      for line in self.lines:
186        if re.match(r'^--- 0 ----', line):
187          return True
188    else:
189      return re.match(r'^@@ -1.* \+0,0 @@', self.hdr)
190
191def is_file_diff_start(s):
192  # Don't be fooled by context diff line markers:
193  #   *** 385,391 ****
194  return ((s.startswith('***') and not s.endswith('***'))
195          or (s.startswith('---') and not s.endswith('---')))
196
197def is_ctx_hunk_start(s):
198  return re.match(r'^\*\*\*\*\*\**', s)
199
200def is_uni_hunk_start(s):
201  return re.match(r'^@@ .* @@', s)
202
203def is_hunk_start(s):
204  return is_ctx_hunk_start(s) or is_uni_hunk_start(s)
205
206def remove_suffixes(s):
207  if s.startswith('a/') or s.startswith('b/'):
208    s = s[2:]
209  if s.endswith('.jj'):
210    s = s[:-3]
211  return s
212
213def find_changed_funs(hunk):
214  """Find all functions touched by hunk.  We don't try too hard
215     to find good matches.  This should return a superset
216     of the actual set of functions in the .diff file.
217  """
218
219  fns = []
220  fn = None
221
222  if (cache.match(r'^\*\*\*\*\*\** ([a-zA-Z0-9_].*)', hunk.hdr)
223      or cache.match(r'^@@ .* @@ ([a-zA-Z0-9_].*)', hunk.hdr)):
224    fn = cache.group(1)
225
226  for i, line in enumerate(hunk.lines):
227    # Context diffs have extra whitespace after first char;
228    # remove it to make matching easier.
229    if hunk.ctx_diff:
230      line = re.sub(r'^([-+! ]) ', r'\1', line)
231
232    # Remember most recent identifier in hunk
233    # that might be a function name.
234    if cache.match(r'^[-+! ]([a-zA-Z0-9_#].*)', line):
235      fn = cache.group(1)
236
237    change = line and re.match(r'^[-+!][^-]', line)
238
239    # Top-level comment cannot belong to function
240    if re.match(r'^[-+! ]\/\*', line):
241      fn = None
242
243    if change and fn:
244      if cache.match(r'^((class|struct|union|enum)\s+[a-zA-Z0-9_]+)', fn):
245        # Struct declaration
246        fn = cache.group(1)
247      elif cache.search(r'#\s*define\s+([a-zA-Z0-9_]+)', fn):
248        # Macro definition
249        fn = cache.group(1)
250      elif cache.match('^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)', fn):
251        # Supermacro
252        fn = cache.group(1)
253      elif cache.search(r'([a-zA-Z_][^()\s]*)\s*\([^*]', fn):
254        # Discard template and function parameters.
255        fn = cache.group(1)
256        fn = re.sub(r'<[^<>]*>', '', fn)
257        fn = fn.rstrip()
258      else:
259        fn = None
260
261      if fn and fn not in fns:  # Avoid dups
262        fns.append(fn)
263
264      fn = None
265
266  return fns
267
268def parse_patch(contents):
269  """Parse patch contents to a sequence of FileDiffs."""
270
271  diffs = []
272
273  lines = contents.split('\n')
274
275  i = 0
276  while i < len(lines):
277    line = lines[i]
278
279    # Diff headers look like
280    #   --- a/gcc/tree.c
281    #   +++ b/gcc/tree.c
282    # or
283    #   *** gcc/cfgexpand.c     2013-12-25 20:07:24.800350058 +0400
284    #   --- gcc/cfgexpand.c     2013-12-25 20:06:30.612350178 +0400
285
286    if is_file_diff_start(line):
287      left = re.split(r'\s+', line)[1]
288    else:
289      i += 1
290      continue
291
292    left = remove_suffixes(left);
293
294    i += 1
295    line = lines[i]
296
297    if not cache.match(r'^[+-][+-][+-] +(\S+)', line):
298      error("expected filename in line %d" % i)
299    right = remove_suffixes(cache.group(1));
300
301    # Extract real file name from left and right names.
302    filename = None
303    if left == right:
304      filename = left
305    elif left == '/dev/null':
306      filename = right;
307    elif right == '/dev/null':
308      filename = left;
309    else:
310      comps = []
311      while left and right:
312        left, l = os.path.split(left)
313        right, r = os.path.split(right)
314        if l != r:
315          break
316        comps.append(l)
317
318      if not comps:
319        error("failed to extract common name for %s and %s" % (left, right))
320
321      comps.reverse()
322      filename = '/'.join(comps)
323
324    d = FileDiff(filename)
325    diffs.append(d)
326
327    # Collect hunks for current file.
328    hunk = None
329    i += 1
330    while i < len(lines):
331      line = lines[i]
332
333      # Create new hunk when we see hunk header
334      if is_hunk_start(line):
335        if hunk is not None:
336          d.hunks.append(hunk)
337        hunk = Hunk(line)
338        i += 1
339        continue
340
341      # Stop when we reach next diff
342      if (is_file_diff_start(line)
343          or line.startswith('diff ')
344          or line.startswith('Index: ')):
345        i -= 1
346        break
347
348      if hunk is not None:
349        hunk.lines.append(line)
350      i += 1
351
352    d.hunks.append(hunk)
353
354  return diffs
355
356def main():
357  name, email = read_user_info()
358
359  try:
360    opts, args = getopt.getopt(sys.argv[1:], 'hiv', ['help', 'verbose', 'inline'])
361  except getopt.GetoptError, err:
362    error(str(err))
363
364  inline = False
365  verbose = 0
366
367  for o, a in opts:
368    if o in ('-h', '--help'):
369      print_help_and_exit()
370    elif o in ('-i', '--inline'):
371      inline = True
372    elif o in ('-v', '--verbose'):
373      verbose += 1
374    else:
375      assert False, "unhandled option"
376
377  if len(args) == 0:
378    args = ['-']
379
380  if len(args) == 1 and args[0] == '-':
381    input = sys.stdin
382  elif len(args) == 1:
383    input = open(args[0], 'rb')
384  else:
385    error("too many arguments; for more details run with -h")
386
387  contents = input.read()
388  diffs = parse_patch(contents)
389
390  if verbose:
391    print "Parse results:"
392    for d in diffs:
393      d.dump()
394
395  # Generate template ChangeLog.
396
397  logs = {}
398  for d in diffs:
399    log_name = d.clname
400
401    logs.setdefault(log_name, '')
402    logs[log_name] += '\t* %s' % d.relname
403
404    change_msg = ''
405
406    # Check if file was removed or added.
407    # Two patterns for context and unified diff.
408    if len(d.hunks) == 1:
409      hunk0 = d.hunks[0]
410      if hunk0.is_file_addition():
411        if re.search(r'testsuite.*(?<!\.exp)$', d.filename):
412          change_msg = ': New test.\n'
413        else:
414          change_msg = ": New file.\n"
415      elif hunk0.is_file_removal():
416        change_msg = ": Remove.\n"
417
418    _, ext = os.path.splitext(d.filename)
419    if not change_msg and ext in ['.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def']:
420      fns = []
421      for hunk in d.hunks:
422        for fn in find_changed_funs(hunk):
423          if fn not in fns:
424            fns.append(fn)
425
426      for fn in fns:
427        if change_msg:
428          change_msg += "\t(%s):\n" % fn
429        else:
430          change_msg = " (%s):\n" % fn
431
432    logs[log_name] += change_msg if change_msg else ":\n"
433
434  if inline and args[0] != '-':
435    # Get a temp filename, rather than an open filehandle, because we use
436    # the open to truncate.
437    fd, tmp = tempfile.mkstemp("tmp.XXXXXXXX")
438    os.close(fd)
439
440    # Copy permissions to temp file
441    # (old Pythons do not support shutil.copymode)
442    shutil.copymode(args[0], tmp)
443
444    # Open the temp file, clearing contents.
445    out = open(tmp, 'wb')
446  else:
447    tmp = None
448    out = sys.stdout
449
450  # Print log
451  date = time.strftime('%Y-%m-%d')
452  for log_name, msg in sorted(logs.iteritems()):
453    out.write("""\
454%s:
455
456%s  %s  <%s>
457
458%s\n""" % (log_name, date, name, email, msg))
459
460  if inline:
461    # Append patch body
462    out.write(contents)
463
464    if args[0] != '-':
465      # Write new contents atomically
466      out.close()
467      shutil.move(tmp, args[0])
468
469if __name__ == '__main__':
470    main()
471