1#!/usr/bin/env python3 2 3# Copyright (C) 2020 Free Software Foundation, Inc. 4# 5# This file is part of GCC. 6# 7# GCC is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 3, or (at your option) 10# any later version. 11# 12# GCC is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with GCC; see the file COPYING. If not, write to 19# the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20# Boston, MA 02110-1301, USA. 21 22# This script parses a .diff file generated with 'diff -up' or 'diff -cp' 23# and adds a skeleton ChangeLog file to the file. It does not try to be 24# too smart when parsing function names, but it produces a reasonable 25# approximation. 26# 27# Author: Martin Liska <mliska@suse.cz> 28 29import argparse 30import datetime 31import os 32import re 33import subprocess 34import sys 35from itertools import takewhile 36 37import requests 38 39from unidiff import PatchSet 40 41pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)') 42prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)') 43dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)') 44dg_regex = re.compile(r'{\s+dg-(error|warning)') 45pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})') 46identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)') 47comment_regex = re.compile(r'^\/\*') 48struct_regex = re.compile(r'^(class|struct|union|enum)\s+' 49 r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)') 50macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)') 51super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)') 52fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]') 53template_and_param_regex = re.compile(r'<[^<>]*>') 54md_def_regex = re.compile(r'\(define.*\s+"(.*)"') 55bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \ 56 'include_fields=summary,component' 57 58function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'} 59 60# NB: Makefile.in isn't listed as it's not always generated. 61generated_files = {'aclocal.m4', 'config.h.in', 'configure'} 62 63help_message = """\ 64Generate ChangeLog template for PATCH. 65PATCH must be generated using diff(1)'s -up or -cp options 66(or their equivalent in git). 67""" 68 69script_folder = os.path.realpath(__file__) 70root = os.path.dirname(os.path.dirname(script_folder)) 71 72firstpr = '' 73 74 75def find_changelog(path): 76 folder = os.path.split(path)[0] 77 while True: 78 if os.path.exists(os.path.join(root, folder, 'ChangeLog')): 79 return folder 80 folder = os.path.dirname(folder) 81 if folder == '': 82 return folder 83 raise AssertionError() 84 85 86def extract_function_name(line): 87 if comment_regex.match(line): 88 return None 89 m = struct_regex.search(line) 90 if m: 91 # Struct declaration 92 return m.group(1) + ' ' + m.group(3) 93 m = macro_regex.search(line) 94 if m: 95 # Macro definition 96 return m.group(2) 97 m = super_macro_regex.search(line) 98 if m: 99 # Supermacro 100 return m.group(1) 101 m = fn_regex.search(line) 102 if m: 103 # Discard template and function parameters. 104 fn = m.group(1) 105 fn = re.sub(template_and_param_regex, '', fn) 106 return fn.rstrip() 107 return None 108 109 110def try_add_function(functions, line): 111 fn = extract_function_name(line) 112 if fn and fn not in functions: 113 functions.append(fn) 114 return bool(fn) 115 116 117def sort_changelog_files(changed_file): 118 return (changed_file.is_added_file, changed_file.is_removed_file) 119 120 121def get_pr_titles(prs): 122 output = [] 123 for idx, pr in enumerate(prs): 124 pr_id = pr.split('/')[-1] 125 r = requests.get(bugzilla_url % pr_id) 126 bugs = r.json()['bugs'] 127 if len(bugs) == 1: 128 prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id) 129 out = '%s - %s\n' % (prs[idx], bugs[0]['summary']) 130 if out not in output: 131 output.append(out) 132 if output: 133 output.append('') 134 return '\n'.join(output) 135 136 137def generate_changelog(data, no_functions=False, fill_pr_titles=False, 138 additional_prs=None): 139 changelogs = {} 140 changelog_list = [] 141 prs = [] 142 out = '' 143 diff = PatchSet(data) 144 global firstpr 145 146 if additional_prs: 147 prs = [pr for pr in additional_prs if pr not in prs] 148 for file in diff: 149 # skip files that can't be parsed 150 if file.path == '/dev/null': 151 continue 152 changelog = find_changelog(file.path) 153 if changelog not in changelogs: 154 changelogs[changelog] = [] 155 changelog_list.append(changelog) 156 changelogs[changelog].append(file) 157 158 # Extract PR entries from newly added tests 159 if 'testsuite' in file.path and file.is_added_file: 160 # Only search first ten lines as later lines may 161 # contains commented code which a note that it 162 # has not been tested due to a certain PR or DR. 163 this_file_prs = [] 164 for line in list(file)[0][0:10]: 165 m = pr_regex.search(line.value) 166 if m: 167 pr = m.group('pr') 168 if pr not in prs: 169 prs.append(pr) 170 this_file_prs.append(pr.split('/')[-1]) 171 else: 172 m = dr_regex.search(line.value) 173 if m: 174 dr = m.group('dr') 175 if dr not in prs: 176 prs.append(dr) 177 this_file_prs.append(dr.split('/')[-1]) 178 elif dg_regex.search(line.value): 179 # Found dg-warning/dg-error line 180 break 181 # PR number in the file name 182 fname = os.path.basename(file.path) 183 m = pr_filename_regex.search(fname) 184 if m: 185 pr = m.group('pr') 186 pr2 = 'PR ' + pr 187 if pr not in this_file_prs and pr2 not in prs: 188 prs.append(pr2) 189 190 if prs: 191 firstpr = prs[0] 192 193 if fill_pr_titles: 194 out += get_pr_titles(prs) 195 196 # print list of PR entries before ChangeLog entries 197 if prs: 198 if not out: 199 out += '\n' 200 for pr in prs: 201 out += '\t%s\n' % pr 202 out += '\n' 203 204 # sort ChangeLog so that 'testsuite' is at the end 205 for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x): 206 files = changelogs[changelog] 207 out += '%s:\n' % os.path.join(changelog, 'ChangeLog') 208 out += '\n' 209 # new and deleted files should be at the end 210 for file in sorted(files, key=sort_changelog_files): 211 assert file.path.startswith(changelog) 212 in_tests = 'testsuite' in changelog or 'testsuite' in file.path 213 relative_path = file.path[len(changelog):].lstrip('/') 214 functions = [] 215 if file.is_added_file: 216 msg = 'New test' if in_tests else 'New file' 217 out += '\t* %s: %s.\n' % (relative_path, msg) 218 elif file.is_removed_file: 219 out += '\t* %s: Removed.\n' % (relative_path) 220 elif hasattr(file, 'is_rename') and file.is_rename: 221 out += '\t* %s: Moved to...\n' % (relative_path) 222 new_path = file.target_file[2:] 223 # A file can be theoretically moved to a location that 224 # belongs to a different ChangeLog. Let user fix it. 225 if new_path.startswith(changelog): 226 new_path = new_path[len(changelog):].lstrip('/') 227 out += '\t* %s: ...here.\n' % (new_path) 228 elif os.path.basename(file.path) in generated_files: 229 out += '\t* %s: Regenerate.\n' % (relative_path) 230 else: 231 if not no_functions: 232 for hunk in file: 233 # Do not add function names for testsuite files 234 extension = os.path.splitext(relative_path)[1] 235 if not in_tests and extension in function_extensions: 236 last_fn = None 237 modified_visited = False 238 success = False 239 for line in hunk: 240 m = identifier_regex.match(line.value) 241 if line.is_added or line.is_removed: 242 # special-case definition in .md files 243 m2 = md_def_regex.match(line.value) 244 if extension == '.md' and m2: 245 fn = m2.group(1) 246 if fn not in functions: 247 functions.append(fn) 248 last_fn = None 249 success = True 250 251 if not line.value.strip(): 252 continue 253 modified_visited = True 254 if m and try_add_function(functions, 255 m.group(1)): 256 last_fn = None 257 success = True 258 elif line.is_context: 259 if last_fn and modified_visited: 260 try_add_function(functions, last_fn) 261 last_fn = None 262 modified_visited = False 263 success = True 264 elif m: 265 last_fn = m.group(1) 266 modified_visited = False 267 if not success: 268 try_add_function(functions, 269 hunk.section_header) 270 if functions: 271 out += '\t* %s (%s):\n' % (relative_path, functions[0]) 272 for fn in functions[1:]: 273 out += '\t(%s):\n' % fn 274 else: 275 out += '\t* %s:\n' % relative_path 276 out += '\n' 277 return out 278 279 280def update_copyright(data): 281 current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d') 282 username = subprocess.check_output('git config user.name', shell=True, 283 encoding='utf8').strip() 284 email = subprocess.check_output('git config user.email', shell=True, 285 encoding='utf8').strip() 286 287 changelogs = set() 288 diff = PatchSet(data) 289 290 for file in diff: 291 changelog = os.path.join(find_changelog(file.path), 'ChangeLog') 292 if changelog not in changelogs: 293 changelogs.add(changelog) 294 with open(changelog) as f: 295 content = f.read() 296 with open(changelog, 'w+') as f: 297 f.write(f'{current_timestamp} {username} <{email}>\n\n') 298 f.write('\tUpdate copyright years.\n\n') 299 f.write(content) 300 301 302if __name__ == '__main__': 303 parser = argparse.ArgumentParser(description=help_message) 304 parser.add_argument('input', nargs='?', 305 help='Patch file (or missing, read standard input)') 306 parser.add_argument('-b', '--pr-numbers', action='store', 307 type=lambda arg: arg.split(','), nargs='?', 308 help='Add the specified PRs (comma separated)') 309 parser.add_argument('-s', '--no-functions', action='store_true', 310 help='Do not generate function names in ChangeLogs') 311 parser.add_argument('-p', '--fill-up-bug-titles', action='store_true', 312 help='Download title of mentioned PRs') 313 parser.add_argument('-d', '--directory', 314 help='Root directory where to search for ChangeLog ' 315 'files') 316 parser.add_argument('-c', '--changelog', 317 help='Append the ChangeLog to a git commit message ' 318 'file') 319 parser.add_argument('--update-copyright', action='store_true', 320 help='Update copyright in ChangeLog files') 321 args = parser.parse_args() 322 if args.input == '-': 323 args.input = None 324 if args.directory: 325 root = args.directory 326 327 data = open(args.input) if args.input else sys.stdin 328 if args.update_copyright: 329 update_copyright(data) 330 else: 331 output = generate_changelog(data, args.no_functions, 332 args.fill_up_bug_titles, args.pr_numbers) 333 if args.changelog: 334 lines = open(args.changelog).read().split('\n') 335 start = list(takewhile(lambda l: not l.startswith('#'), lines)) 336 end = lines[len(start):] 337 with open(args.changelog, 'w') as f: 338 if not start or not start[0]: 339 # initial commit subject line 'component: [PRnnnnn]' 340 m = prnum_regex.match(firstpr) 341 if m: 342 title = f'{m.group("comp")}: [PR{m.group("num")}]' 343 start.insert(0, title) 344 if start: 345 # append empty line 346 if start[-1] != '': 347 start.append('') 348 else: 349 # append 2 empty lines 350 start = 2 * [''] 351 f.write('\n'.join(start)) 352 f.write('\n') 353 f.write(output) 354 f.write('\n'.join(end)) 355 else: 356 print(output, end='') 357