1#!/usr/bin/env python3
2
3# Copyright (C) 2017-2019 Free Software Foundation, Inc.
4#
5# This file is part of GCC.
6#
7# GCC is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 3, or (at your option)
10# any later version.
11#
12# GCC is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with GCC; see the file COPYING.  If not, write to
19# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301, USA.
21
22# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23# and adds a skeleton ChangeLog file to the file. It does not try to be
24# too smart when parsing function names, but it produces a reasonable
25# approximation.
26#
27# This is a straightforward adaptation of original Perl script.
28#
29# Author: Yury Gribov <tetra2005@gmail.com>
30
31import argparse
32import sys
33import re
34import os.path
35import os
36import tempfile
37import time
38import shutil
39from subprocess import Popen, PIPE
40
41me = os.path.basename(sys.argv[0])
42
43pr_regex = re.compile('\+(\/(\/|\*)|[Cc*!])\s+(PR [a-z+-]+\/[0-9]+)')
44
45def error(msg):
46  sys.stderr.write("%s: error: %s\n" % (me, msg))
47  sys.exit(1)
48
49def warn(msg):
50  sys.stderr.write("%s: warning: %s\n" % (me, msg))
51
52class RegexCache(object):
53  """Simple trick to Perl-like combined match-and-bind."""
54
55  def __init__(self):
56    self.last_match = None
57
58  def match(self, p, s):
59    self.last_match = re.match(p, s) if isinstance(p, str) else p.match(s)
60    return self.last_match
61
62  def search(self, p, s):
63    self.last_match = re.search(p, s) if isinstance(p, str) else p.search(s)
64    return self.last_match
65
66  def group(self, n):
67    return self.last_match.group(n)
68
69cache = RegexCache()
70
71def run(cmd, die_on_error):
72  """Simple wrapper for Popen."""
73  proc = Popen(cmd.split(' '), stderr = PIPE, stdout = PIPE)
74  (out, err) = proc.communicate()
75  if die_on_error and proc.returncode != 0:
76    error("`%s` failed:\n" % (cmd, proc.stderr))
77  return proc.returncode, out.decode(), err
78
79def read_user_info():
80  dot_mklog_format_msg = """\
81The .mklog format is:
82NAME = ...
83EMAIL = ...
84"""
85
86  # First try to read .mklog config
87  mklog_conf = os.path.expanduser('~/.mklog')
88  if os.path.exists(mklog_conf):
89    attrs = {}
90    f = open(mklog_conf)
91    for s in f:
92      if cache.match(r'^\s*([a-zA-Z0-9_]+)\s*=\s*(.*?)\s*$', s):
93        attrs[cache.group(1)] = cache.group(2)
94    f.close()
95    if 'NAME' not in attrs:
96      error("'NAME' not present in .mklog")
97    if 'EMAIL' not in attrs:
98      error("'EMAIL' not present in .mklog")
99    return attrs['NAME'], attrs['EMAIL']
100
101  # Otherwise go with git
102
103  rc1, name, _ = run('git config user.name', False)
104  name = name.rstrip()
105  rc2, email, _ = run('git config user.email', False)
106  email = email.rstrip()
107
108  if rc1 != 0 or rc2 != 0:
109    error("""\
110Could not read git user.name and user.email settings.
111Please add missing git settings, or create a %s.
112""" % mklog_conf)
113
114  return name, email
115
116def get_parent_changelog (s):
117  """See which ChangeLog this file change should go to."""
118
119  if s.find('\\') == -1 and s.find('/') == -1:
120    return "ChangeLog", s
121
122  gcc_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
123
124  d = s
125  while d:
126    clname = d + "/ChangeLog"
127    if os.path.exists(gcc_root + '/' + clname) or os.path.exists(clname):
128      relname = s[len(d)+1:]
129      return clname, relname
130    d, _ = os.path.split(d)
131
132  return "Unknown ChangeLog", s
133
134class FileDiff:
135  """Class to represent changes in a single file."""
136
137  def __init__(self, filename):
138    self.filename = filename
139    self.hunks = []
140    self.clname, self.relname = get_parent_changelog(filename);
141
142  def dump(self):
143    print("Diff for %s:\n  ChangeLog = %s\n  rel name = %s\n" % (self.filename, self.clname, self.relname))
144    for i, h in enumerate(self.hunks):
145      print("Next hunk %d:" % i)
146      h.dump()
147
148class Hunk:
149  """Class to represent a single hunk of changes."""
150
151  def __init__(self, hdr):
152    self.hdr = hdr
153    self.lines = []
154    self.ctx_diff = is_ctx_hunk_start(hdr)
155
156  def dump(self):
157    print('%s' % self.hdr)
158    print('%s' % '\n'.join(self.lines))
159
160  def is_file_addition(self):
161    """Does hunk describe addition of file?"""
162    if self.ctx_diff:
163      for line in self.lines:
164        if re.match(r'^\*\*\* 0 \*\*\*\*', line):
165          return True
166    else:
167      return re.match(r'^@@ -0,0 \+1.* @@', self.hdr)
168
169  def is_file_removal(self):
170    """Does hunk describe removal of file?"""
171    if self.ctx_diff:
172      for line in self.lines:
173        if re.match(r'^--- 0 ----', line):
174          return True
175    else:
176      return re.match(r'^@@ -1.* \+0,0 @@', self.hdr)
177
178def is_file_diff_start(s):
179  # Don't be fooled by context diff line markers:
180  #   *** 385,391 ****
181  return ((s.startswith('*** ') and not s.endswith('***'))
182          or (s.startswith('--- ') and not s.endswith('---')))
183
184def is_ctx_hunk_start(s):
185  return re.match(r'^\*\*\*\*\*\**', s)
186
187def is_uni_hunk_start(s):
188  return re.match(r'^@@ .* @@', s)
189
190def is_hunk_start(s):
191  return is_ctx_hunk_start(s) or is_uni_hunk_start(s)
192
193def remove_suffixes(s):
194  if s.startswith('a/') or s.startswith('b/'):
195    s = s[2:]
196  if s.endswith('.jj'):
197    s = s[:-3]
198  return s
199
200def find_changed_funs(hunk):
201  """Find all functions touched by hunk.  We don't try too hard
202     to find good matches.  This should return a superset
203     of the actual set of functions in the .diff file.
204  """
205
206  fns = []
207  fn = None
208
209  if (cache.match(r'^\*\*\*\*\*\** ([a-zA-Z0-9_].*)', hunk.hdr)
210      or cache.match(r'^@@ .* @@ ([a-zA-Z0-9_].*)', hunk.hdr)):
211    fn = cache.group(1)
212
213  for i, line in enumerate(hunk.lines):
214    # Context diffs have extra whitespace after first char;
215    # remove it to make matching easier.
216    if hunk.ctx_diff:
217      line = re.sub(r'^([-+! ]) ', r'\1', line)
218
219    # Remember most recent identifier in hunk
220    # that might be a function name.
221    if cache.match(r'^[-+! ]([a-zA-Z0-9_#].*)', line):
222      fn = cache.group(1)
223
224    change = line and re.match(r'^[-+!][^-]', line)
225
226    # Top-level comment cannot belong to function
227    if re.match(r'^[-+! ]\/\*', line):
228      fn = None
229
230    if change and fn:
231      if cache.match(r'^((class|struct|union|enum)\s+[a-zA-Z0-9_]+)', fn):
232        # Struct declaration
233        fn = cache.group(1)
234      elif cache.search(r'#\s*define\s+([a-zA-Z0-9_]+)', fn):
235        # Macro definition
236        fn = cache.group(1)
237      elif cache.match('^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)', fn):
238        # Supermacro
239        fn = cache.group(1)
240      elif cache.search(r'([a-zA-Z_][^()\s]*)\s*\([^*]', fn):
241        # Discard template and function parameters.
242        fn = cache.group(1)
243        fn = re.sub(r'<[^<>]*>', '', fn)
244        fn = fn.rstrip()
245      else:
246        fn = None
247
248      if fn and fn not in fns:  # Avoid dups
249        fns.append(fn)
250
251      fn = None
252
253  return fns
254
255def parse_patch(contents):
256  """Parse patch contents to a sequence of FileDiffs."""
257
258  diffs = []
259
260  lines = contents.split('\n')
261
262  i = 0
263  while i < len(lines):
264    line = lines[i]
265
266    # Diff headers look like
267    #   --- a/gcc/tree.c
268    #   +++ b/gcc/tree.c
269    # or
270    #   *** gcc/cfgexpand.c     2013-12-25 20:07:24.800350058 +0400
271    #   --- gcc/cfgexpand.c     2013-12-25 20:06:30.612350178 +0400
272
273    if is_file_diff_start(line):
274      left = re.split(r'\s+', line)[1]
275    else:
276      i += 1
277      continue
278
279    left = remove_suffixes(left);
280
281    i += 1
282    line = lines[i]
283
284    if not cache.match(r'^[+-][+-][+-] +(\S+)', line):
285      error("expected filename in line %d" % i)
286    right = remove_suffixes(cache.group(1));
287
288    # Extract real file name from left and right names.
289    filename = None
290    if left == right:
291      filename = left
292    elif left == '/dev/null':
293      filename = right;
294    elif right == '/dev/null':
295      filename = left;
296    else:
297      comps = []
298      while left and right:
299        left, l = os.path.split(left)
300        right, r = os.path.split(right)
301        if l != r:
302          break
303        comps.append(l)
304
305      if not comps:
306        error("failed to extract common name for %s and %s" % (left, right))
307
308      comps.reverse()
309      filename = '/'.join(comps)
310
311    d = FileDiff(filename)
312    diffs.append(d)
313
314    # Collect hunks for current file.
315    hunk = None
316    i += 1
317    while i < len(lines):
318      line = lines[i]
319
320      # Create new hunk when we see hunk header
321      if is_hunk_start(line):
322        if hunk is not None:
323          d.hunks.append(hunk)
324        hunk = Hunk(line)
325        i += 1
326        continue
327
328      # Stop when we reach next diff
329      if (is_file_diff_start(line)
330          or line.startswith('diff ')
331          or line.startswith('Index: ')):
332        i -= 1
333        break
334
335      if hunk is not None:
336        hunk.lines.append(line)
337      i += 1
338
339    d.hunks.append(hunk)
340
341  return diffs
342
343
344def get_pr_from_testcase(line):
345    r = pr_regex.search(line)
346    if r != None:
347        return r.group(3)
348    else:
349        return None
350
351def main():
352  name, email = read_user_info()
353
354  help_message =  """\
355Generate ChangeLog template for PATCH.
356PATCH must be generated using diff(1)'s -up or -cp options
357(or their equivalent in Subversion/git).
358"""
359
360  inline_message = """\
361Prepends ChangeLog to PATCH.
362If PATCH is not stdin, modifies PATCH in-place,
363otherwise writes to stdout.'
364"""
365
366  parser = argparse.ArgumentParser(description = help_message)
367  parser.add_argument('-v', '--verbose', action = 'store_true', help = 'Verbose messages')
368  parser.add_argument('-i', '--inline', action = 'store_true', help = inline_message)
369  parser.add_argument('input', nargs = '?', help = 'Patch file (or missing, read standard input)')
370  args = parser.parse_args()
371  if args.input == '-':
372      args.input = None
373  input = open(args.input) if args.input else sys.stdin
374  contents = input.read()
375  diffs = parse_patch(contents)
376
377  if args.verbose:
378    print("Parse results:")
379    for d in diffs:
380      d.dump()
381
382  # Generate template ChangeLog.
383
384  logs = {}
385  prs = []
386  for d in diffs:
387    log_name = d.clname
388
389    logs.setdefault(log_name, '')
390    logs[log_name] += '\t* %s' % d.relname
391
392    change_msg = ''
393
394    # Check if file was removed or added.
395    # Two patterns for context and unified diff.
396    if len(d.hunks) == 1:
397      hunk0 = d.hunks[0]
398      if hunk0.is_file_addition():
399        if re.search(r'testsuite.*(?<!\.exp)$', d.filename):
400          change_msg = ': New test.\n'
401          pr = get_pr_from_testcase(hunk0.lines[0])
402          if pr and pr not in prs:
403              prs.append(pr)
404        else:
405          change_msg = ": New file.\n"
406      elif hunk0.is_file_removal():
407        change_msg = ": Remove.\n"
408
409    _, ext = os.path.splitext(d.filename)
410    if (not change_msg and ext in ['.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def']
411        and not 'testsuite' in d.filename):
412      fns = []
413      for hunk in d.hunks:
414        for fn in find_changed_funs(hunk):
415          if fn not in fns:
416            fns.append(fn)
417
418      for fn in fns:
419        if change_msg:
420          change_msg += "\t(%s):\n" % fn
421        else:
422          change_msg = " (%s):\n" % fn
423
424    logs[log_name] += change_msg if change_msg else ":\n"
425
426  if args.inline and args.input:
427    # Get a temp filename, rather than an open filehandle, because we use
428    # the open to truncate.
429    fd, tmp = tempfile.mkstemp("tmp.XXXXXXXX")
430    os.close(fd)
431
432    # Copy permissions to temp file
433    # (old Pythons do not support shutil.copymode)
434    shutil.copymode(args.input, tmp)
435
436    # Open the temp file, clearing contents.
437    out = open(tmp, 'w')
438  else:
439    tmp = None
440    out = sys.stdout
441
442  # Print log
443  date = time.strftime('%Y-%m-%d')
444  bugmsg = ''
445  if len(prs):
446    bugmsg = '\n'.join(['\t' + pr for pr in prs]) + '\n'
447
448  for log_name, msg in sorted(logs.items()):
449    out.write("""\
450%s:
451
452%s  %s  <%s>
453
454%s%s\n""" % (log_name, date, name, email, bugmsg, msg))
455
456  if args.inline:
457    # Append patch body
458    out.write(contents)
459
460    if args.input:
461      # Write new contents atomically
462      out.close()
463      shutil.move(tmp, args.input)
464
465if __name__ == '__main__':
466    main()
467