1# -*- coding: utf-8 -*- 2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3# See https://llvm.org/LICENSE.txt for license information. 4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5""" This module is responsible to generate 'index.html' for the report. 6 7The input for this step is the output directory, where individual reports 8could be found. It parses those reports and generates 'index.html'. """ 9 10import re 11import os 12import os.path 13import sys 14import shutil 15import plistlib 16import glob 17import json 18import logging 19import datetime 20from libscanbuild import duplicate_check 21from libscanbuild.clang import get_version 22 23__all__ = ['document'] 24 25 26def document(args): 27 """ Generates cover report and returns the number of bugs/crashes. """ 28 29 html_reports_available = args.output_format in {'html', 'plist-html', 'sarif-html'} 30 sarif_reports_available = args.output_format in {'sarif', 'sarif-html'} 31 32 logging.debug('count crashes and bugs') 33 crash_count = sum(1 for _ in read_crashes(args.output)) 34 bug_counter = create_counters() 35 for bug in read_bugs(args.output, html_reports_available): 36 bug_counter(bug) 37 result = crash_count + bug_counter.total 38 39 if html_reports_available and result: 40 use_cdb = os.path.exists(args.cdb) 41 42 logging.debug('generate index.html file') 43 # common prefix for source files to have sorter path 44 prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd() 45 # assemble the cover from multiple fragments 46 fragments = [] 47 try: 48 if bug_counter.total: 49 fragments.append(bug_summary(args.output, bug_counter)) 50 fragments.append(bug_report(args.output, prefix)) 51 if crash_count: 52 fragments.append(crash_report(args.output, prefix)) 53 assemble_cover(args, prefix, fragments) 54 # copy additional files to the report 55 copy_resource_files(args.output) 56 if use_cdb: 57 shutil.copy(args.cdb, args.output) 58 finally: 59 for fragment in fragments: 60 os.remove(fragment) 61 62 if sarif_reports_available: 63 logging.debug('merging sarif files') 64 merge_sarif_files(args.output) 65 66 return result 67 68 69def assemble_cover(args, prefix, fragments): 70 """ Put together the fragments into a final report. """ 71 72 import getpass 73 import socket 74 75 if args.html_title is None: 76 args.html_title = os.path.basename(prefix) + ' - analyzer results' 77 78 with open(os.path.join(args.output, 'index.html'), 'w') as handle: 79 indent = 0 80 handle.write(reindent(""" 81 |<!DOCTYPE html> 82 |<html> 83 | <head> 84 | <title>{html_title}</title> 85 | <link type="text/css" rel="stylesheet" href="scanview.css"/> 86 | <script type='text/javascript' src="sorttable.js"></script> 87 | <script type='text/javascript' src='selectable.js'></script> 88 | </head>""", indent).format(html_title=args.html_title)) 89 handle.write(comment('SUMMARYENDHEAD')) 90 handle.write(reindent(""" 91 | <body> 92 | <h1>{html_title}</h1> 93 | <table> 94 | <tr><th>User:</th><td>{user_name}@{host_name}</td></tr> 95 | <tr><th>Working Directory:</th><td>{current_dir}</td></tr> 96 | <tr><th>Command Line:</th><td>{cmd_args}</td></tr> 97 | <tr><th>Clang Version:</th><td>{clang_version}</td></tr> 98 | <tr><th>Date:</th><td>{date}</td></tr> 99 | </table>""", indent).format(html_title=args.html_title, 100 user_name=getpass.getuser(), 101 host_name=socket.gethostname(), 102 current_dir=prefix, 103 cmd_args=' '.join(sys.argv), 104 clang_version=get_version(args.clang), 105 date=datetime.datetime.today( 106 ).strftime('%c'))) 107 for fragment in fragments: 108 # copy the content of fragments 109 with open(fragment, 'r') as input_handle: 110 shutil.copyfileobj(input_handle, handle) 111 handle.write(reindent(""" 112 | </body> 113 |</html>""", indent)) 114 115 116def bug_summary(output_dir, bug_counter): 117 """ Bug summary is a HTML table to give a better overview of the bugs. """ 118 119 name = os.path.join(output_dir, 'summary.html.fragment') 120 with open(name, 'w') as handle: 121 indent = 4 122 handle.write(reindent(""" 123 |<h2>Bug Summary</h2> 124 |<table> 125 | <thead> 126 | <tr> 127 | <td>Bug Type</td> 128 | <td>Quantity</td> 129 | <td class="sorttable_nosort">Display?</td> 130 | </tr> 131 | </thead> 132 | <tbody>""", indent)) 133 handle.write(reindent(""" 134 | <tr style="font-weight:bold"> 135 | <td class="SUMM_DESC">All Bugs</td> 136 | <td class="Q">{0}</td> 137 | <td> 138 | <center> 139 | <input checked type="checkbox" id="AllBugsCheck" 140 | onClick="CopyCheckedStateToCheckButtons(this);"/> 141 | </center> 142 | </td> 143 | </tr>""", indent).format(bug_counter.total)) 144 for category, types in bug_counter.categories.items(): 145 handle.write(reindent(""" 146 | <tr> 147 | <th>{0}</th><th colspan=2></th> 148 | </tr>""", indent).format(category)) 149 for bug_type in types.values(): 150 handle.write(reindent(""" 151 | <tr> 152 | <td class="SUMM_DESC">{bug_type}</td> 153 | <td class="Q">{bug_count}</td> 154 | <td> 155 | <center> 156 | <input checked type="checkbox" 157 | onClick="ToggleDisplay(this,'{bug_type_class}');"/> 158 | </center> 159 | </td> 160 | </tr>""", indent).format(**bug_type)) 161 handle.write(reindent(""" 162 | </tbody> 163 |</table>""", indent)) 164 handle.write(comment('SUMMARYBUGEND')) 165 return name 166 167 168def bug_report(output_dir, prefix): 169 """ Creates a fragment from the analyzer reports. """ 170 171 pretty = prettify_bug(prefix, output_dir) 172 bugs = (pretty(bug) for bug in read_bugs(output_dir, True)) 173 174 name = os.path.join(output_dir, 'bugs.html.fragment') 175 with open(name, 'w') as handle: 176 indent = 4 177 handle.write(reindent(""" 178 |<h2>Reports</h2> 179 |<table class="sortable" style="table-layout:automatic"> 180 | <thead> 181 | <tr> 182 | <td>Bug Group</td> 183 | <td class="sorttable_sorted"> 184 | Bug Type 185 | <span id="sorttable_sortfwdind"> ▾</span> 186 | </td> 187 | <td>File</td> 188 | <td>Function/Method</td> 189 | <td class="Q">Line</td> 190 | <td class="Q">Path Length</td> 191 | <td class="sorttable_nosort"></td> 192 | </tr> 193 | </thead> 194 | <tbody>""", indent)) 195 handle.write(comment('REPORTBUGCOL')) 196 for current in bugs: 197 handle.write(reindent(""" 198 | <tr class="{bug_type_class}"> 199 | <td class="DESC">{bug_category}</td> 200 | <td class="DESC">{bug_type}</td> 201 | <td>{bug_file}</td> 202 | <td class="DESC">{bug_function}</td> 203 | <td class="Q">{bug_line}</td> 204 | <td class="Q">{bug_path_length}</td> 205 | <td><a href="{report_file}#EndPath">View Report</a></td> 206 | </tr>""", indent).format(**current)) 207 handle.write(comment('REPORTBUG', {'id': current['report_file']})) 208 handle.write(reindent(""" 209 | </tbody> 210 |</table>""", indent)) 211 handle.write(comment('REPORTBUGEND')) 212 return name 213 214 215def crash_report(output_dir, prefix): 216 """ Creates a fragment from the compiler crashes. """ 217 218 pretty = prettify_crash(prefix, output_dir) 219 crashes = (pretty(crash) for crash in read_crashes(output_dir)) 220 221 name = os.path.join(output_dir, 'crashes.html.fragment') 222 with open(name, 'w') as handle: 223 indent = 4 224 handle.write(reindent(""" 225 |<h2>Analyzer Failures</h2> 226 |<p>The analyzer had problems processing the following files:</p> 227 |<table> 228 | <thead> 229 | <tr> 230 | <td>Problem</td> 231 | <td>Source File</td> 232 | <td>Preprocessed File</td> 233 | <td>STDERR Output</td> 234 | </tr> 235 | </thead> 236 | <tbody>""", indent)) 237 for current in crashes: 238 handle.write(reindent(""" 239 | <tr> 240 | <td>{problem}</td> 241 | <td>{source}</td> 242 | <td><a href="{file}">preprocessor output</a></td> 243 | <td><a href="{stderr}">analyzer std err</a></td> 244 | </tr>""", indent).format(**current)) 245 handle.write(comment('REPORTPROBLEM', current)) 246 handle.write(reindent(""" 247 | </tbody> 248 |</table>""", indent)) 249 handle.write(comment('REPORTCRASHES')) 250 return name 251 252 253def read_crashes(output_dir): 254 """ Generate a unique sequence of crashes from given output directory. """ 255 256 return (parse_crash(filename) 257 for filename in glob.iglob(os.path.join(output_dir, 'failures', 258 '*.info.txt'))) 259 260 261def read_bugs(output_dir, html): 262 # type: (str, bool) -> Generator[Dict[str, Any], None, None] 263 """ Generate a unique sequence of bugs from given output directory. 264 265 Duplicates can be in a project if the same module was compiled multiple 266 times with different compiler options. These would be better to show in 267 the final report (cover) only once. """ 268 269 def empty(file_name): 270 return os.stat(file_name).st_size == 0 271 272 duplicate = duplicate_check( 273 lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug)) 274 275 # get the right parser for the job. 276 parser = parse_bug_html if html else parse_bug_plist 277 # get the input files, which are not empty. 278 pattern = os.path.join(output_dir, '*.html' if html else '*.plist') 279 bug_files = (file for file in glob.iglob(pattern) if not empty(file)) 280 281 for bug_file in bug_files: 282 for bug in parser(bug_file): 283 if not duplicate(bug): 284 yield bug 285 286def merge_sarif_files(output_dir, sort_files=False): 287 """ Reads and merges all .sarif files in the given output directory. 288 289 Each sarif file in the output directory is understood as a single run 290 and thus appear separate in the top level runs array. This requires 291 modifying the run index of any embedded links in messages. 292 """ 293 294 def empty(file_name): 295 return os.stat(file_name).st_size == 0 296 297 def update_sarif_object(sarif_object, runs_count_offset): 298 """ 299 Given a SARIF object, checks its dictionary entries for a 'message' property. 300 If it exists, updates the message index of embedded links in the run index. 301 302 Recursively looks through entries in the dictionary. 303 """ 304 if not isinstance(sarif_object, dict): 305 return sarif_object 306 307 if 'message' in sarif_object: 308 sarif_object['message'] = match_and_update_run(sarif_object['message'], runs_count_offset) 309 310 for key in sarif_object: 311 if isinstance(sarif_object[key], list): 312 # iterate through subobjects and update it. 313 arr = [update_sarif_object(entry, runs_count_offset) for entry in sarif_object[key]] 314 sarif_object[key] = arr 315 elif isinstance(sarif_object[key], dict): 316 sarif_object[key] = update_sarif_object(sarif_object[key], runs_count_offset) 317 else: 318 # do nothing 319 pass 320 321 return sarif_object 322 323 324 def match_and_update_run(message, runs_count_offset): 325 """ 326 Given a SARIF message object, checks if the text property contains an embedded link and 327 updates the run index if necessary. 328 """ 329 if 'text' not in message: 330 return message 331 332 # we only merge runs, so we only need to update the run index 333 pattern = re.compile(r'sarif:/runs/(\d+)') 334 335 text = message['text'] 336 matches = re.finditer(pattern, text) 337 matches_list = list(matches) 338 339 # update matches from right to left to make increasing character length (9->10) smoother 340 for idx in range(len(matches_list) - 1, -1, -1): 341 match = matches_list[idx] 342 new_run_count = str(runs_count_offset + int(match.group(1))) 343 text = text[0:match.start(1)] + new_run_count + text[match.end(1):] 344 345 message['text'] = text 346 return message 347 348 349 350 sarif_files = (file for file in glob.iglob(os.path.join(output_dir, '*.sarif')) if not empty(file)) 351 # exposed for testing since the order of files returned by glob is not guaranteed to be sorted 352 if sort_files: 353 sarif_files = list(sarif_files) 354 sarif_files.sort() 355 356 runs_count = 0 357 merged = {} 358 for sarif_file in sarif_files: 359 with open(sarif_file) as fp: 360 sarif = json.load(fp) 361 if 'runs' not in sarif: 362 continue 363 364 # start with the first file 365 if not merged: 366 merged = sarif 367 else: 368 # extract the run and append it to the merged output 369 for run in sarif['runs']: 370 new_run = update_sarif_object(run, runs_count) 371 merged['runs'].append(new_run) 372 373 runs_count += len(sarif['runs']) 374 375 with open(os.path.join(output_dir, 'results-merged.sarif'), 'w') as out: 376 json.dump(merged, out, indent=4, sort_keys=True) 377 378 379def parse_bug_plist(filename): 380 """ Returns the generator of bugs from a single .plist file. """ 381 382 content = plistlib.readPlist(filename) 383 files = content.get('files') 384 for bug in content.get('diagnostics', []): 385 if len(files) <= int(bug['location']['file']): 386 logging.warning('Parsing bug from "%s" failed', filename) 387 continue 388 389 yield { 390 'result': filename, 391 'bug_type': bug['type'], 392 'bug_category': bug['category'], 393 'bug_line': int(bug['location']['line']), 394 'bug_path_length': int(bug['location']['col']), 395 'bug_file': files[int(bug['location']['file'])] 396 } 397 398 399def parse_bug_html(filename): 400 """ Parse out the bug information from HTML output. """ 401 402 patterns = [re.compile(r'<!-- BUGTYPE (?P<bug_type>.*) -->$'), 403 re.compile(r'<!-- BUGFILE (?P<bug_file>.*) -->$'), 404 re.compile(r'<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$'), 405 re.compile(r'<!-- BUGLINE (?P<bug_line>.*) -->$'), 406 re.compile(r'<!-- BUGCATEGORY (?P<bug_category>.*) -->$'), 407 re.compile(r'<!-- BUGDESC (?P<bug_description>.*) -->$'), 408 re.compile(r'<!-- FUNCTIONNAME (?P<bug_function>.*) -->$')] 409 endsign = re.compile(r'<!-- BUGMETAEND -->') 410 411 bug = { 412 'report_file': filename, 413 'bug_function': 'n/a', # compatibility with < clang-3.5 414 'bug_category': 'Other', 415 'bug_line': 0, 416 'bug_path_length': 1 417 } 418 419 with open(filename) as handler: 420 for line in handler.readlines(): 421 # do not read the file further 422 if endsign.match(line): 423 break 424 # search for the right lines 425 for regex in patterns: 426 match = regex.match(line.strip()) 427 if match: 428 bug.update(match.groupdict()) 429 break 430 431 encode_value(bug, 'bug_line', int) 432 encode_value(bug, 'bug_path_length', int) 433 434 yield bug 435 436 437def parse_crash(filename): 438 """ Parse out the crash information from the report file. """ 439 440 match = re.match(r'(.*)\.info\.txt', filename) 441 name = match.group(1) if match else None 442 with open(filename, mode='rb') as handler: 443 # this is a workaround to fix windows read '\r\n' as new lines. 444 lines = [line.decode().rstrip() for line in handler.readlines()] 445 return { 446 'source': lines[0], 447 'problem': lines[1], 448 'file': name, 449 'info': name + '.info.txt', 450 'stderr': name + '.stderr.txt' 451 } 452 453 454def category_type_name(bug): 455 """ Create a new bug attribute from bug by category and type. 456 457 The result will be used as CSS class selector in the final report. """ 458 459 def smash(key): 460 """ Make value ready to be HTML attribute value. """ 461 462 return bug.get(key, '').lower().replace(' ', '_').replace("'", '') 463 464 return escape('bt_' + smash('bug_category') + '_' + smash('bug_type')) 465 466 467def create_counters(): 468 """ Create counters for bug statistics. 469 470 Two entries are maintained: 'total' is an integer, represents the 471 number of bugs. The 'categories' is a two level categorisation of bug 472 counters. The first level is 'bug category' the second is 'bug type'. 473 Each entry in this classification is a dictionary of 'count', 'type' 474 and 'label'. """ 475 476 def predicate(bug): 477 bug_category = bug['bug_category'] 478 bug_type = bug['bug_type'] 479 current_category = predicate.categories.get(bug_category, dict()) 480 current_type = current_category.get(bug_type, { 481 'bug_type': bug_type, 482 'bug_type_class': category_type_name(bug), 483 'bug_count': 0 484 }) 485 current_type.update({'bug_count': current_type['bug_count'] + 1}) 486 current_category.update({bug_type: current_type}) 487 predicate.categories.update({bug_category: current_category}) 488 predicate.total += 1 489 490 predicate.total = 0 491 predicate.categories = dict() 492 return predicate 493 494 495def prettify_bug(prefix, output_dir): 496 def predicate(bug): 497 """ Make safe this values to embed into HTML. """ 498 499 bug['bug_type_class'] = category_type_name(bug) 500 501 encode_value(bug, 'bug_file', lambda x: escape(chop(prefix, x))) 502 encode_value(bug, 'bug_category', escape) 503 encode_value(bug, 'bug_type', escape) 504 encode_value(bug, 'report_file', lambda x: escape(chop(output_dir, x))) 505 return bug 506 507 return predicate 508 509 510def prettify_crash(prefix, output_dir): 511 def predicate(crash): 512 """ Make safe this values to embed into HTML. """ 513 514 encode_value(crash, 'source', lambda x: escape(chop(prefix, x))) 515 encode_value(crash, 'problem', escape) 516 encode_value(crash, 'file', lambda x: escape(chop(output_dir, x))) 517 encode_value(crash, 'info', lambda x: escape(chop(output_dir, x))) 518 encode_value(crash, 'stderr', lambda x: escape(chop(output_dir, x))) 519 return crash 520 521 return predicate 522 523 524def copy_resource_files(output_dir): 525 """ Copy the javascript and css files to the report directory. """ 526 527 this_dir = os.path.dirname(os.path.realpath(__file__)) 528 for resource in os.listdir(os.path.join(this_dir, 'resources')): 529 shutil.copy(os.path.join(this_dir, 'resources', resource), output_dir) 530 531 532def encode_value(container, key, encode): 533 """ Run 'encode' on 'container[key]' value and update it. """ 534 535 if key in container: 536 value = encode(container[key]) 537 container.update({key: value}) 538 539 540def chop(prefix, filename): 541 """ Create 'filename' from '/prefix/filename' """ 542 543 return filename if not len(prefix) else os.path.relpath(filename, prefix) 544 545 546def escape(text): 547 """ Paranoid HTML escape method. (Python version independent) """ 548 549 escape_table = { 550 '&': '&', 551 '"': '"', 552 "'": ''', 553 '>': '>', 554 '<': '<' 555 } 556 return ''.join(escape_table.get(c, c) for c in text) 557 558 559def reindent(text, indent): 560 """ Utility function to format html output and keep indentation. """ 561 562 result = '' 563 for line in text.splitlines(): 564 if len(line.strip()): 565 result += ' ' * indent + line.split('|')[1] + os.linesep 566 return result 567 568 569def comment(name, opts=dict()): 570 """ Utility function to format meta information as comment. """ 571 572 attributes = '' 573 for key, value in opts.items(): 574 attributes += ' {0}="{1}"'.format(key, value) 575 576 return '<!-- {0}{1} -->{2}'.format(name, attributes, os.linesep) 577 578 579def commonprefix_from(filename): 580 """ Create file prefix from a compilation database entries. """ 581 582 with open(filename, 'r') as handle: 583 return commonprefix(item['file'] for item in json.load(handle)) 584 585 586def commonprefix(files): 587 """ Fixed version of os.path.commonprefix. 588 589 :param files: list of file names. 590 :return: the longest path prefix that is a prefix of all files. """ 591 result = None 592 for current in files: 593 if result is not None: 594 result = os.path.commonprefix([result, current]) 595 else: 596 result = current 597 598 if result is None: 599 return '' 600 elif not os.path.isdir(result): 601 return os.path.dirname(result) 602 else: 603 return os.path.abspath(result) 604