1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module is responsible to generate 'index.html' for the report.
6
7The input for this step is the output directory, where individual reports
8could be found. It parses those reports and generates 'index.html'. """
9
10import re
11import os
12import os.path
13import sys
14import shutil
15import plistlib
16import glob
17import json
18import logging
19import datetime
20from libscanbuild import duplicate_check
21from libscanbuild.clang import get_version
22
23__all__ = ['document']
24
25
26def document(args):
27    """ Generates cover report and returns the number of bugs/crashes. """
28
29    html_reports_available = args.output_format in {'html', 'plist-html', 'sarif-html'}
30    sarif_reports_available = args.output_format in {'sarif', 'sarif-html'}
31
32    logging.debug('count crashes and bugs')
33    crash_count = sum(1 for _ in read_crashes(args.output))
34    bug_counter = create_counters()
35    for bug in read_bugs(args.output, html_reports_available):
36        bug_counter(bug)
37    result = crash_count + bug_counter.total
38
39    if html_reports_available and result:
40        use_cdb = os.path.exists(args.cdb)
41
42        logging.debug('generate index.html file')
43        # common prefix for source files to have sorter path
44        prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
45        # assemble the cover from multiple fragments
46        fragments = []
47        try:
48            if bug_counter.total:
49                fragments.append(bug_summary(args.output, bug_counter))
50                fragments.append(bug_report(args.output, prefix))
51            if crash_count:
52                fragments.append(crash_report(args.output, prefix))
53            assemble_cover(args, prefix, fragments)
54            # copy additional files to the report
55            copy_resource_files(args.output)
56            if use_cdb:
57                shutil.copy(args.cdb, args.output)
58        finally:
59            for fragment in fragments:
60                os.remove(fragment)
61
62    if sarif_reports_available:
63        logging.debug('merging sarif files')
64        merge_sarif_files(args.output)
65
66    return result
67
68
69def assemble_cover(args, prefix, fragments):
70    """ Put together the fragments into a final report. """
71
72    import getpass
73    import socket
74
75    if args.html_title is None:
76        args.html_title = os.path.basename(prefix) + ' - analyzer results'
77
78    with open(os.path.join(args.output, 'index.html'), 'w') as handle:
79        indent = 0
80        handle.write(reindent("""
81        |<!DOCTYPE html>
82        |<html>
83        |  <head>
84        |    <title>{html_title}</title>
85        |    <link type="text/css" rel="stylesheet" href="scanview.css"/>
86        |    <script type='text/javascript' src="sorttable.js"></script>
87        |    <script type='text/javascript' src='selectable.js'></script>
88        |  </head>""", indent).format(html_title=args.html_title))
89        handle.write(comment('SUMMARYENDHEAD'))
90        handle.write(reindent("""
91        |  <body>
92        |    <h1>{html_title}</h1>
93        |    <table>
94        |      <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
95        |      <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
96        |      <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
97        |      <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
98        |      <tr><th>Date:</th><td>{date}</td></tr>
99        |    </table>""", indent).format(html_title=args.html_title,
100                                         user_name=getpass.getuser(),
101                                         host_name=socket.gethostname(),
102                                         current_dir=prefix,
103                                         cmd_args=' '.join(sys.argv),
104                                         clang_version=get_version(args.clang),
105                                         date=datetime.datetime.today(
106                                         ).strftime('%c')))
107        for fragment in fragments:
108            # copy the content of fragments
109            with open(fragment, 'r') as input_handle:
110                shutil.copyfileobj(input_handle, handle)
111        handle.write(reindent("""
112        |  </body>
113        |</html>""", indent))
114
115
116def bug_summary(output_dir, bug_counter):
117    """ Bug summary is a HTML table to give a better overview of the bugs. """
118
119    name = os.path.join(output_dir, 'summary.html.fragment')
120    with open(name, 'w') as handle:
121        indent = 4
122        handle.write(reindent("""
123        |<h2>Bug Summary</h2>
124        |<table>
125        |  <thead>
126        |    <tr>
127        |      <td>Bug Type</td>
128        |      <td>Quantity</td>
129        |      <td class="sorttable_nosort">Display?</td>
130        |    </tr>
131        |  </thead>
132        |  <tbody>""", indent))
133        handle.write(reindent("""
134        |    <tr style="font-weight:bold">
135        |      <td class="SUMM_DESC">All Bugs</td>
136        |      <td class="Q">{0}</td>
137        |      <td>
138        |        <center>
139        |          <input checked type="checkbox" id="AllBugsCheck"
140        |                 onClick="CopyCheckedStateToCheckButtons(this);"/>
141        |        </center>
142        |      </td>
143        |    </tr>""", indent).format(bug_counter.total))
144        for category, types in bug_counter.categories.items():
145            handle.write(reindent("""
146        |    <tr>
147        |      <th>{0}</th><th colspan=2></th>
148        |    </tr>""", indent).format(category))
149            for bug_type in types.values():
150                handle.write(reindent("""
151        |    <tr>
152        |      <td class="SUMM_DESC">{bug_type}</td>
153        |      <td class="Q">{bug_count}</td>
154        |      <td>
155        |        <center>
156        |          <input checked type="checkbox"
157        |                 onClick="ToggleDisplay(this,'{bug_type_class}');"/>
158        |        </center>
159        |      </td>
160        |    </tr>""", indent).format(**bug_type))
161        handle.write(reindent("""
162        |  </tbody>
163        |</table>""", indent))
164        handle.write(comment('SUMMARYBUGEND'))
165    return name
166
167
168def bug_report(output_dir, prefix):
169    """ Creates a fragment from the analyzer reports. """
170
171    pretty = prettify_bug(prefix, output_dir)
172    bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
173
174    name = os.path.join(output_dir, 'bugs.html.fragment')
175    with open(name, 'w') as handle:
176        indent = 4
177        handle.write(reindent("""
178        |<h2>Reports</h2>
179        |<table class="sortable" style="table-layout:automatic">
180        |  <thead>
181        |    <tr>
182        |      <td>Bug Group</td>
183        |      <td class="sorttable_sorted">
184        |        Bug Type
185        |        <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
186        |      </td>
187        |      <td>File</td>
188        |      <td>Function/Method</td>
189        |      <td class="Q">Line</td>
190        |      <td class="Q">Path Length</td>
191        |      <td class="sorttable_nosort"></td>
192        |    </tr>
193        |  </thead>
194        |  <tbody>""", indent))
195        handle.write(comment('REPORTBUGCOL'))
196        for current in bugs:
197            handle.write(reindent("""
198        |    <tr class="{bug_type_class}">
199        |      <td class="DESC">{bug_category}</td>
200        |      <td class="DESC">{bug_type}</td>
201        |      <td>{bug_file}</td>
202        |      <td class="DESC">{bug_function}</td>
203        |      <td class="Q">{bug_line}</td>
204        |      <td class="Q">{bug_path_length}</td>
205        |      <td><a href="{report_file}#EndPath">View Report</a></td>
206        |    </tr>""", indent).format(**current))
207            handle.write(comment('REPORTBUG', {'id': current['report_file']}))
208        handle.write(reindent("""
209        |  </tbody>
210        |</table>""", indent))
211        handle.write(comment('REPORTBUGEND'))
212    return name
213
214
215def crash_report(output_dir, prefix):
216    """ Creates a fragment from the compiler crashes. """
217
218    pretty = prettify_crash(prefix, output_dir)
219    crashes = (pretty(crash) for crash in read_crashes(output_dir))
220
221    name = os.path.join(output_dir, 'crashes.html.fragment')
222    with open(name, 'w') as handle:
223        indent = 4
224        handle.write(reindent("""
225        |<h2>Analyzer Failures</h2>
226        |<p>The analyzer had problems processing the following files:</p>
227        |<table>
228        |  <thead>
229        |    <tr>
230        |      <td>Problem</td>
231        |      <td>Source File</td>
232        |      <td>Preprocessed File</td>
233        |      <td>STDERR Output</td>
234        |    </tr>
235        |  </thead>
236        |  <tbody>""", indent))
237        for current in crashes:
238            handle.write(reindent("""
239        |    <tr>
240        |      <td>{problem}</td>
241        |      <td>{source}</td>
242        |      <td><a href="{file}">preprocessor output</a></td>
243        |      <td><a href="{stderr}">analyzer std err</a></td>
244        |    </tr>""", indent).format(**current))
245            handle.write(comment('REPORTPROBLEM', current))
246        handle.write(reindent("""
247        |  </tbody>
248        |</table>""", indent))
249        handle.write(comment('REPORTCRASHES'))
250    return name
251
252
253def read_crashes(output_dir):
254    """ Generate a unique sequence of crashes from given output directory. """
255
256    return (parse_crash(filename)
257            for filename in glob.iglob(os.path.join(output_dir, 'failures',
258                                                    '*.info.txt')))
259
260
261def read_bugs(output_dir, html):
262    # type: (str, bool) -> Generator[Dict[str, Any], None, None]
263    """ Generate a unique sequence of bugs from given output directory.
264
265    Duplicates can be in a project if the same module was compiled multiple
266    times with different compiler options. These would be better to show in
267    the final report (cover) only once. """
268
269    def empty(file_name):
270        return os.stat(file_name).st_size == 0
271
272    duplicate = duplicate_check(
273        lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug))
274
275    # get the right parser for the job.
276    parser = parse_bug_html if html else parse_bug_plist
277    # get the input files, which are not empty.
278    pattern = os.path.join(output_dir, '*.html' if html else '*.plist')
279    bug_files = (file for file in glob.iglob(pattern) if not empty(file))
280
281    for bug_file in bug_files:
282        for bug in parser(bug_file):
283            if not duplicate(bug):
284                yield bug
285
286def merge_sarif_files(output_dir, sort_files=False):
287    """ Reads and merges all .sarif files in the given output directory.
288
289    Each sarif file in the output directory is understood as a single run
290    and thus appear separate in the top level runs array. This requires
291    modifying the run index of any embedded links in messages.
292    """
293
294    def empty(file_name):
295        return os.stat(file_name).st_size == 0
296
297    def update_sarif_object(sarif_object, runs_count_offset):
298        """
299            Given a SARIF object, checks its dictionary entries for a 'message' property.
300            If it exists, updates the message index of embedded links in the run index.
301
302            Recursively looks through entries in the dictionary.
303        """
304        if not isinstance(sarif_object, dict):
305            return sarif_object
306
307        if 'message' in sarif_object:
308            sarif_object['message'] = match_and_update_run(sarif_object['message'], runs_count_offset)
309
310        for key in sarif_object:
311            if isinstance(sarif_object[key], list):
312                # iterate through subobjects and update it.
313                arr = [update_sarif_object(entry, runs_count_offset) for entry in sarif_object[key]]
314                sarif_object[key] = arr
315            elif isinstance(sarif_object[key], dict):
316                sarif_object[key] = update_sarif_object(sarif_object[key], runs_count_offset)
317            else:
318                # do nothing
319                pass
320
321        return sarif_object
322
323
324    def match_and_update_run(message, runs_count_offset):
325        """
326            Given a SARIF message object, checks if the text property contains an embedded link and
327            updates the run index if necessary.
328        """
329        if 'text' not in message:
330            return message
331
332        # we only merge runs, so we only need to update the run index
333        pattern = re.compile(r'sarif:/runs/(\d+)')
334
335        text = message['text']
336        matches = re.finditer(pattern, text)
337        matches_list = list(matches)
338
339        # update matches from right to left to make increasing character length (9->10) smoother
340        for idx in range(len(matches_list) - 1, -1, -1):
341            match = matches_list[idx]
342            new_run_count = str(runs_count_offset + int(match.group(1)))
343            text = text[0:match.start(1)] + new_run_count + text[match.end(1):]
344
345        message['text'] = text
346        return message
347
348
349
350    sarif_files = (file for file in glob.iglob(os.path.join(output_dir, '*.sarif')) if not empty(file))
351    # exposed for testing since the order of files returned by glob is not guaranteed to be sorted
352    if sort_files:
353        sarif_files = list(sarif_files)
354        sarif_files.sort()
355
356    runs_count = 0
357    merged = {}
358    for sarif_file in sarif_files:
359        with open(sarif_file) as fp:
360            sarif = json.load(fp)
361            if 'runs' not in sarif:
362                continue
363
364            # start with the first file
365            if not merged:
366                merged = sarif
367            else:
368                # extract the run and append it to the merged output
369                for run in sarif['runs']:
370                    new_run = update_sarif_object(run, runs_count)
371                    merged['runs'].append(new_run)
372
373            runs_count += len(sarif['runs'])
374
375    with open(os.path.join(output_dir, 'results-merged.sarif'), 'w') as out:
376        json.dump(merged, out, indent=4, sort_keys=True)
377
378
379def parse_bug_plist(filename):
380    """ Returns the generator of bugs from a single .plist file. """
381
382    with open(filename, 'rb') as fp:
383      content = plistlib.load(fp)
384      files = content.get('files')
385      for bug in content.get('diagnostics', []):
386          if len(files) <= int(bug['location']['file']):
387              logging.warning('Parsing bug from "%s" failed', filename)
388              continue
389
390          yield {
391              'result': filename,
392              'bug_type': bug['type'],
393              'bug_category': bug['category'],
394              'bug_line': int(bug['location']['line']),
395              'bug_path_length': int(bug['location']['col']),
396              'bug_file': files[int(bug['location']['file'])]
397          }
398
399
400def parse_bug_html(filename):
401    """ Parse out the bug information from HTML output. """
402
403    patterns = [re.compile(r'<!-- BUGTYPE (?P<bug_type>.*) -->$'),
404                re.compile(r'<!-- BUGFILE (?P<bug_file>.*) -->$'),
405                re.compile(r'<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$'),
406                re.compile(r'<!-- BUGLINE (?P<bug_line>.*) -->$'),
407                re.compile(r'<!-- BUGCATEGORY (?P<bug_category>.*) -->$'),
408                re.compile(r'<!-- BUGDESC (?P<bug_description>.*) -->$'),
409                re.compile(r'<!-- FUNCTIONNAME (?P<bug_function>.*) -->$')]
410    endsign = re.compile(r'<!-- BUGMETAEND -->')
411
412    bug = {
413        'report_file': filename,
414        'bug_function': 'n/a',  # compatibility with < clang-3.5
415        'bug_category': 'Other',
416        'bug_line': 0,
417        'bug_path_length': 1
418    }
419
420    with open(filename) as handler:
421        for line in handler.readlines():
422            # do not read the file further
423            if endsign.match(line):
424                break
425            # search for the right lines
426            for regex in patterns:
427                match = regex.match(line.strip())
428                if match:
429                    bug.update(match.groupdict())
430                    break
431
432    encode_value(bug, 'bug_line', int)
433    encode_value(bug, 'bug_path_length', int)
434
435    yield bug
436
437
438def parse_crash(filename):
439    """ Parse out the crash information from the report file. """
440
441    match = re.match(r'(.*)\.info\.txt', filename)
442    name = match.group(1) if match else None
443    with open(filename, mode='rb') as handler:
444        # this is a workaround to fix windows read '\r\n' as new lines.
445        lines = [line.decode().rstrip() for line in handler.readlines()]
446        return {
447            'source': lines[0],
448            'problem': lines[1],
449            'file': name,
450            'info': name + '.info.txt',
451            'stderr': name + '.stderr.txt'
452        }
453
454
455def category_type_name(bug):
456    """ Create a new bug attribute from bug by category and type.
457
458    The result will be used as CSS class selector in the final report. """
459
460    def smash(key):
461        """ Make value ready to be HTML attribute value. """
462
463        return bug.get(key, '').lower().replace(' ', '_').replace("'", '')
464
465    return escape('bt_' + smash('bug_category') + '_' + smash('bug_type'))
466
467
468def create_counters():
469    """ Create counters for bug statistics.
470
471    Two entries are maintained: 'total' is an integer, represents the
472    number of bugs. The 'categories' is a two level categorisation of bug
473    counters. The first level is 'bug category' the second is 'bug type'.
474    Each entry in this classification is a dictionary of 'count', 'type'
475    and 'label'. """
476
477    def predicate(bug):
478        bug_category = bug['bug_category']
479        bug_type = bug['bug_type']
480        current_category = predicate.categories.get(bug_category, dict())
481        current_type = current_category.get(bug_type, {
482            'bug_type': bug_type,
483            'bug_type_class': category_type_name(bug),
484            'bug_count': 0
485        })
486        current_type.update({'bug_count': current_type['bug_count'] + 1})
487        current_category.update({bug_type: current_type})
488        predicate.categories.update({bug_category: current_category})
489        predicate.total += 1
490
491    predicate.total = 0
492    predicate.categories = dict()
493    return predicate
494
495
496def prettify_bug(prefix, output_dir):
497    def predicate(bug):
498        """ Make safe this values to embed into HTML. """
499
500        bug['bug_type_class'] = category_type_name(bug)
501
502        encode_value(bug, 'bug_file', lambda x: escape(chop(prefix, x)))
503        encode_value(bug, 'bug_category', escape)
504        encode_value(bug, 'bug_type', escape)
505        encode_value(bug, 'report_file', lambda x: escape(chop(output_dir, x)))
506        return bug
507
508    return predicate
509
510
511def prettify_crash(prefix, output_dir):
512    def predicate(crash):
513        """ Make safe this values to embed into HTML. """
514
515        encode_value(crash, 'source', lambda x: escape(chop(prefix, x)))
516        encode_value(crash, 'problem', escape)
517        encode_value(crash, 'file', lambda x: escape(chop(output_dir, x)))
518        encode_value(crash, 'info', lambda x: escape(chop(output_dir, x)))
519        encode_value(crash, 'stderr', lambda x: escape(chop(output_dir, x)))
520        return crash
521
522    return predicate
523
524
525def copy_resource_files(output_dir):
526    """ Copy the javascript and css files to the report directory. """
527
528    this_dir = os.path.dirname(os.path.realpath(__file__))
529    for resource in os.listdir(os.path.join(this_dir, 'resources')):
530        shutil.copy(os.path.join(this_dir, 'resources', resource), output_dir)
531
532
533def encode_value(container, key, encode):
534    """ Run 'encode' on 'container[key]' value and update it. """
535
536    if key in container:
537        value = encode(container[key])
538        container.update({key: value})
539
540
541def chop(prefix, filename):
542    """ Create 'filename' from '/prefix/filename' """
543
544    return filename if not len(prefix) else os.path.relpath(filename, prefix)
545
546
547def escape(text):
548    """ Paranoid HTML escape method. (Python version independent) """
549
550    escape_table = {
551        '&': '&amp;',
552        '"': '&quot;',
553        "'": '&apos;',
554        '>': '&gt;',
555        '<': '&lt;'
556    }
557    return ''.join(escape_table.get(c, c) for c in text)
558
559
560def reindent(text, indent):
561    """ Utility function to format html output and keep indentation. """
562
563    result = ''
564    for line in text.splitlines():
565        if len(line.strip()):
566            result += ' ' * indent + line.split('|')[1] + os.linesep
567    return result
568
569
570def comment(name, opts=dict()):
571    """ Utility function to format meta information as comment. """
572
573    attributes = ''
574    for key, value in opts.items():
575        attributes += ' {0}="{1}"'.format(key, value)
576
577    return '<!-- {0}{1} -->{2}'.format(name, attributes, os.linesep)
578
579
580def commonprefix_from(filename):
581    """ Create file prefix from a compilation database entries. """
582
583    with open(filename, 'r') as handle:
584        return commonprefix(item['file'] for item in json.load(handle))
585
586
587def commonprefix(files):
588    """ Fixed version of os.path.commonprefix.
589
590    :param files: list of file names.
591    :return: the longest path prefix that is a prefix of all files. """
592    result = None
593    for current in files:
594        if result is not None:
595            result = os.path.commonprefix([result, current])
596        else:
597            result = current
598
599    if result is None:
600        return ''
601    elif not os.path.isdir(result):
602        return os.path.dirname(result)
603    else:
604        return os.path.abspath(result)
605