1#!/usr/bin/env python3
2
3# Copyright (C) 2020 Free Software Foundation, Inc.
4#
5# This file is part of GCC.
6#
7# GCC is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 3, or (at your option)
10# any later version.
11#
12# GCC is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with GCC; see the file COPYING.  If not, write to
19# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301, USA.
21
22# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23# and adds a skeleton ChangeLog file to the file. It does not try to be
24# too smart when parsing function names, but it produces a reasonable
25# approximation.
26#
27# Author: Martin Liska <mliska@suse.cz>
28
29import argparse
30import datetime
31import os
32import re
33import subprocess
34import sys
35from itertools import takewhile
36
37import requests
38
39from unidiff import PatchSet
40
41pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
42prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
43dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
44dg_regex = re.compile(r'{\s+dg-(error|warning)')
45pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
46identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
47comment_regex = re.compile(r'^\/\*')
48struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
49                          r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
50macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
51super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
52fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
53template_and_param_regex = re.compile(r'<[^<>]*>')
54md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
55bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
56               'include_fields=summary,component'
57
58function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
59
60# NB: Makefile.in isn't listed as it's not always generated.
61generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
62
63help_message = """\
64Generate ChangeLog template for PATCH.
65PATCH must be generated using diff(1)'s -up or -cp options
66(or their equivalent in git).
67"""
68
69script_folder = os.path.realpath(__file__)
70root = os.path.dirname(os.path.dirname(script_folder))
71
72firstpr = ''
73
74
75def find_changelog(path):
76    folder = os.path.split(path)[0]
77    while True:
78        if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
79            return folder
80        folder = os.path.dirname(folder)
81        if folder == '':
82            return folder
83    raise AssertionError()
84
85
86def extract_function_name(line):
87    if comment_regex.match(line):
88        return None
89    m = struct_regex.search(line)
90    if m:
91        # Struct declaration
92        return m.group(1) + ' ' + m.group(3)
93    m = macro_regex.search(line)
94    if m:
95        # Macro definition
96        return m.group(2)
97    m = super_macro_regex.search(line)
98    if m:
99        # Supermacro
100        return m.group(1)
101    m = fn_regex.search(line)
102    if m:
103        # Discard template and function parameters.
104        fn = m.group(1)
105        fn = re.sub(template_and_param_regex, '', fn)
106        return fn.rstrip()
107    return None
108
109
110def try_add_function(functions, line):
111    fn = extract_function_name(line)
112    if fn and fn not in functions:
113        functions.append(fn)
114    return bool(fn)
115
116
117def sort_changelog_files(changed_file):
118    return (changed_file.is_added_file, changed_file.is_removed_file)
119
120
121def get_pr_titles(prs):
122    output = []
123    for idx, pr in enumerate(prs):
124        pr_id = pr.split('/')[-1]
125        r = requests.get(bugzilla_url % pr_id)
126        bugs = r.json()['bugs']
127        if len(bugs) == 1:
128            prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
129            out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
130            if out not in output:
131                output.append(out)
132    if output:
133        output.append('')
134    return '\n'.join(output)
135
136
137def generate_changelog(data, no_functions=False, fill_pr_titles=False,
138                       additional_prs=None):
139    changelogs = {}
140    changelog_list = []
141    prs = []
142    out = ''
143    diff = PatchSet(data)
144    global firstpr
145
146    if additional_prs:
147        prs = [pr for pr in additional_prs if pr not in prs]
148    for file in diff:
149        # skip files that can't be parsed
150        if file.path == '/dev/null':
151            continue
152        changelog = find_changelog(file.path)
153        if changelog not in changelogs:
154            changelogs[changelog] = []
155            changelog_list.append(changelog)
156        changelogs[changelog].append(file)
157
158        # Extract PR entries from newly added tests
159        if 'testsuite' in file.path and file.is_added_file:
160            # Only search first ten lines as later lines may
161            # contains commented code which a note that it
162            # has not been tested due to a certain PR or DR.
163            this_file_prs = []
164            for line in list(file)[0][0:10]:
165                m = pr_regex.search(line.value)
166                if m:
167                    pr = m.group('pr')
168                    if pr not in prs:
169                        prs.append(pr)
170                        this_file_prs.append(pr.split('/')[-1])
171                else:
172                    m = dr_regex.search(line.value)
173                    if m:
174                        dr = m.group('dr')
175                        if dr not in prs:
176                            prs.append(dr)
177                            this_file_prs.append(dr.split('/')[-1])
178                    elif dg_regex.search(line.value):
179                        # Found dg-warning/dg-error line
180                        break
181            # PR number in the file name
182            fname = os.path.basename(file.path)
183            m = pr_filename_regex.search(fname)
184            if m:
185                pr = m.group('pr')
186                pr2 = 'PR ' + pr
187                if pr not in this_file_prs and pr2 not in prs:
188                    prs.append(pr2)
189
190    if prs:
191        firstpr = prs[0]
192
193    if fill_pr_titles:
194        out += get_pr_titles(prs)
195
196    # print list of PR entries before ChangeLog entries
197    if prs:
198        if not out:
199            out += '\n'
200        for pr in prs:
201            out += '\t%s\n' % pr
202        out += '\n'
203
204    # sort ChangeLog so that 'testsuite' is at the end
205    for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
206        files = changelogs[changelog]
207        out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
208        out += '\n'
209        # new and deleted files should be at the end
210        for file in sorted(files, key=sort_changelog_files):
211            assert file.path.startswith(changelog)
212            in_tests = 'testsuite' in changelog or 'testsuite' in file.path
213            relative_path = file.path[len(changelog):].lstrip('/')
214            functions = []
215            if file.is_added_file:
216                msg = 'New test' if in_tests else 'New file'
217                out += '\t* %s: %s.\n' % (relative_path, msg)
218            elif file.is_removed_file:
219                out += '\t* %s: Removed.\n' % (relative_path)
220            elif hasattr(file, 'is_rename') and file.is_rename:
221                out += '\t* %s: Moved to...\n' % (relative_path)
222                new_path = file.target_file[2:]
223                # A file can be theoretically moved to a location that
224                # belongs to a different ChangeLog.  Let user fix it.
225                if new_path.startswith(changelog):
226                    new_path = new_path[len(changelog):].lstrip('/')
227                out += '\t* %s: ...here.\n' % (new_path)
228            elif os.path.basename(file.path) in generated_files:
229                out += '\t* %s: Regenerate.\n' % (relative_path)
230            else:
231                if not no_functions:
232                    for hunk in file:
233                        # Do not add function names for testsuite files
234                        extension = os.path.splitext(relative_path)[1]
235                        if not in_tests and extension in function_extensions:
236                            last_fn = None
237                            modified_visited = False
238                            success = False
239                            for line in hunk:
240                                m = identifier_regex.match(line.value)
241                                if line.is_added or line.is_removed:
242                                    # special-case definition in .md files
243                                    m2 = md_def_regex.match(line.value)
244                                    if extension == '.md' and m2:
245                                        fn = m2.group(1)
246                                        if fn not in functions:
247                                            functions.append(fn)
248                                            last_fn = None
249                                            success = True
250
251                                    if not line.value.strip():
252                                        continue
253                                    modified_visited = True
254                                    if m and try_add_function(functions,
255                                                              m.group(1)):
256                                        last_fn = None
257                                        success = True
258                                elif line.is_context:
259                                    if last_fn and modified_visited:
260                                        try_add_function(functions, last_fn)
261                                        last_fn = None
262                                        modified_visited = False
263                                        success = True
264                                    elif m:
265                                        last_fn = m.group(1)
266                                        modified_visited = False
267                            if not success:
268                                try_add_function(functions,
269                                                 hunk.section_header)
270                if functions:
271                    out += '\t* %s (%s):\n' % (relative_path, functions[0])
272                    for fn in functions[1:]:
273                        out += '\t(%s):\n' % fn
274                else:
275                    out += '\t* %s:\n' % relative_path
276        out += '\n'
277    return out
278
279
280def update_copyright(data):
281    current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
282    username = subprocess.check_output('git config user.name', shell=True,
283                                       encoding='utf8').strip()
284    email = subprocess.check_output('git config user.email', shell=True,
285                                    encoding='utf8').strip()
286
287    changelogs = set()
288    diff = PatchSet(data)
289
290    for file in diff:
291        changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
292        if changelog not in changelogs:
293            changelogs.add(changelog)
294            with open(changelog) as f:
295                content = f.read()
296            with open(changelog, 'w+') as f:
297                f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
298                f.write('\tUpdate copyright years.\n\n')
299                f.write(content)
300
301
302if __name__ == '__main__':
303    parser = argparse.ArgumentParser(description=help_message)
304    parser.add_argument('input', nargs='?',
305                        help='Patch file (or missing, read standard input)')
306    parser.add_argument('-b', '--pr-numbers', action='store',
307                        type=lambda arg: arg.split(','), nargs='?',
308                        help='Add the specified PRs (comma separated)')
309    parser.add_argument('-s', '--no-functions', action='store_true',
310                        help='Do not generate function names in ChangeLogs')
311    parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
312                        help='Download title of mentioned PRs')
313    parser.add_argument('-d', '--directory',
314                        help='Root directory where to search for ChangeLog '
315                        'files')
316    parser.add_argument('-c', '--changelog',
317                        help='Append the ChangeLog to a git commit message '
318                             'file')
319    parser.add_argument('--update-copyright', action='store_true',
320                        help='Update copyright in ChangeLog files')
321    args = parser.parse_args()
322    if args.input == '-':
323        args.input = None
324    if args.directory:
325        root = args.directory
326
327    data = open(args.input) if args.input else sys.stdin
328    if args.update_copyright:
329        update_copyright(data)
330    else:
331        output = generate_changelog(data, args.no_functions,
332                                    args.fill_up_bug_titles, args.pr_numbers)
333        if args.changelog:
334            lines = open(args.changelog).read().split('\n')
335            start = list(takewhile(lambda l: not l.startswith('#'), lines))
336            end = lines[len(start):]
337            with open(args.changelog, 'w') as f:
338                if not start or not start[0]:
339                    # initial commit subject line 'component: [PRnnnnn]'
340                    m = prnum_regex.match(firstpr)
341                    if m:
342                        title = f'{m.group("comp")}: [PR{m.group("num")}]'
343                        start.insert(0, title)
344                if start:
345                    # append empty line
346                    if start[-1] != '':
347                        start.append('')
348                else:
349                    # append 2 empty lines
350                    start = 2 * ['']
351                f.write('\n'.join(start))
352                f.write('\n')
353                f.write(output)
354                f.write('\n'.join(end))
355        else:
356            print(output, end='')
357