1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module is responsible to generate 'index.html' for the report.
6
7The input for this step is the output directory, where individual reports
8could be found. It parses those reports and generates 'index.html'. """
9
10import re
11import os
12import os.path
13import sys
14import shutil
15import plistlib
16import glob
17import json
18import logging
19import datetime
20from libscanbuild import duplicate_check
21from libscanbuild.clang import get_version
22
23__all__ = ['document']
24
25
26def document(args):
27    """ Generates cover report and returns the number of bugs/crashes. """
28
29    html_reports_available = args.output_format in {'html', 'plist-html'}
30
31    logging.debug('count crashes and bugs')
32    crash_count = sum(1 for _ in read_crashes(args.output))
33    bug_counter = create_counters()
34    for bug in read_bugs(args.output, html_reports_available):
35        bug_counter(bug)
36    result = crash_count + bug_counter.total
37
38    if html_reports_available and result:
39        use_cdb = os.path.exists(args.cdb)
40
41        logging.debug('generate index.html file')
42        # common prefix for source files to have sorter path
43        prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
44        # assemble the cover from multiple fragments
45        fragments = []
46        try:
47            if bug_counter.total:
48                fragments.append(bug_summary(args.output, bug_counter))
49                fragments.append(bug_report(args.output, prefix))
50            if crash_count:
51                fragments.append(crash_report(args.output, prefix))
52            assemble_cover(args, prefix, fragments)
53            # copy additional files to the report
54            copy_resource_files(args.output)
55            if use_cdb:
56                shutil.copy(args.cdb, args.output)
57        finally:
58            for fragment in fragments:
59                os.remove(fragment)
60    return result
61
62
63def assemble_cover(args, prefix, fragments):
64    """ Put together the fragments into a final report. """
65
66    import getpass
67    import socket
68
69    if args.html_title is None:
70        args.html_title = os.path.basename(prefix) + ' - analyzer results'
71
72    with open(os.path.join(args.output, 'index.html'), 'w') as handle:
73        indent = 0
74        handle.write(reindent("""
75        |<!DOCTYPE html>
76        |<html>
77        |  <head>
78        |    <title>{html_title}</title>
79        |    <link type="text/css" rel="stylesheet" href="scanview.css"/>
80        |    <script type='text/javascript' src="sorttable.js"></script>
81        |    <script type='text/javascript' src='selectable.js'></script>
82        |  </head>""", indent).format(html_title=args.html_title))
83        handle.write(comment('SUMMARYENDHEAD'))
84        handle.write(reindent("""
85        |  <body>
86        |    <h1>{html_title}</h1>
87        |    <table>
88        |      <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
89        |      <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
90        |      <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
91        |      <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
92        |      <tr><th>Date:</th><td>{date}</td></tr>
93        |    </table>""", indent).format(html_title=args.html_title,
94                                         user_name=getpass.getuser(),
95                                         host_name=socket.gethostname(),
96                                         current_dir=prefix,
97                                         cmd_args=' '.join(sys.argv),
98                                         clang_version=get_version(args.clang),
99                                         date=datetime.datetime.today(
100                                         ).strftime('%c')))
101        for fragment in fragments:
102            # copy the content of fragments
103            with open(fragment, 'r') as input_handle:
104                shutil.copyfileobj(input_handle, handle)
105        handle.write(reindent("""
106        |  </body>
107        |</html>""", indent))
108
109
110def bug_summary(output_dir, bug_counter):
111    """ Bug summary is a HTML table to give a better overview of the bugs. """
112
113    name = os.path.join(output_dir, 'summary.html.fragment')
114    with open(name, 'w') as handle:
115        indent = 4
116        handle.write(reindent("""
117        |<h2>Bug Summary</h2>
118        |<table>
119        |  <thead>
120        |    <tr>
121        |      <td>Bug Type</td>
122        |      <td>Quantity</td>
123        |      <td class="sorttable_nosort">Display?</td>
124        |    </tr>
125        |  </thead>
126        |  <tbody>""", indent))
127        handle.write(reindent("""
128        |    <tr style="font-weight:bold">
129        |      <td class="SUMM_DESC">All Bugs</td>
130        |      <td class="Q">{0}</td>
131        |      <td>
132        |        <center>
133        |          <input checked type="checkbox" id="AllBugsCheck"
134        |                 onClick="CopyCheckedStateToCheckButtons(this);"/>
135        |        </center>
136        |      </td>
137        |    </tr>""", indent).format(bug_counter.total))
138        for category, types in bug_counter.categories.items():
139            handle.write(reindent("""
140        |    <tr>
141        |      <th>{0}</th><th colspan=2></th>
142        |    </tr>""", indent).format(category))
143            for bug_type in types.values():
144                handle.write(reindent("""
145        |    <tr>
146        |      <td class="SUMM_DESC">{bug_type}</td>
147        |      <td class="Q">{bug_count}</td>
148        |      <td>
149        |        <center>
150        |          <input checked type="checkbox"
151        |                 onClick="ToggleDisplay(this,'{bug_type_class}');"/>
152        |        </center>
153        |      </td>
154        |    </tr>""", indent).format(**bug_type))
155        handle.write(reindent("""
156        |  </tbody>
157        |</table>""", indent))
158        handle.write(comment('SUMMARYBUGEND'))
159    return name
160
161
162def bug_report(output_dir, prefix):
163    """ Creates a fragment from the analyzer reports. """
164
165    pretty = prettify_bug(prefix, output_dir)
166    bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
167
168    name = os.path.join(output_dir, 'bugs.html.fragment')
169    with open(name, 'w') as handle:
170        indent = 4
171        handle.write(reindent("""
172        |<h2>Reports</h2>
173        |<table class="sortable" style="table-layout:automatic">
174        |  <thead>
175        |    <tr>
176        |      <td>Bug Group</td>
177        |      <td class="sorttable_sorted">
178        |        Bug Type
179        |        <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
180        |      </td>
181        |      <td>File</td>
182        |      <td>Function/Method</td>
183        |      <td class="Q">Line</td>
184        |      <td class="Q">Path Length</td>
185        |      <td class="sorttable_nosort"></td>
186        |    </tr>
187        |  </thead>
188        |  <tbody>""", indent))
189        handle.write(comment('REPORTBUGCOL'))
190        for current in bugs:
191            handle.write(reindent("""
192        |    <tr class="{bug_type_class}">
193        |      <td class="DESC">{bug_category}</td>
194        |      <td class="DESC">{bug_type}</td>
195        |      <td>{bug_file}</td>
196        |      <td class="DESC">{bug_function}</td>
197        |      <td class="Q">{bug_line}</td>
198        |      <td class="Q">{bug_path_length}</td>
199        |      <td><a href="{report_file}#EndPath">View Report</a></td>
200        |    </tr>""", indent).format(**current))
201            handle.write(comment('REPORTBUG', {'id': current['report_file']}))
202        handle.write(reindent("""
203        |  </tbody>
204        |</table>""", indent))
205        handle.write(comment('REPORTBUGEND'))
206    return name
207
208
209def crash_report(output_dir, prefix):
210    """ Creates a fragment from the compiler crashes. """
211
212    pretty = prettify_crash(prefix, output_dir)
213    crashes = (pretty(crash) for crash in read_crashes(output_dir))
214
215    name = os.path.join(output_dir, 'crashes.html.fragment')
216    with open(name, 'w') as handle:
217        indent = 4
218        handle.write(reindent("""
219        |<h2>Analyzer Failures</h2>
220        |<p>The analyzer had problems processing the following files:</p>
221        |<table>
222        |  <thead>
223        |    <tr>
224        |      <td>Problem</td>
225        |      <td>Source File</td>
226        |      <td>Preprocessed File</td>
227        |      <td>STDERR Output</td>
228        |    </tr>
229        |  </thead>
230        |  <tbody>""", indent))
231        for current in crashes:
232            handle.write(reindent("""
233        |    <tr>
234        |      <td>{problem}</td>
235        |      <td>{source}</td>
236        |      <td><a href="{file}">preprocessor output</a></td>
237        |      <td><a href="{stderr}">analyzer std err</a></td>
238        |    </tr>""", indent).format(**current))
239            handle.write(comment('REPORTPROBLEM', current))
240        handle.write(reindent("""
241        |  </tbody>
242        |</table>""", indent))
243        handle.write(comment('REPORTCRASHES'))
244    return name
245
246
247def read_crashes(output_dir):
248    """ Generate a unique sequence of crashes from given output directory. """
249
250    return (parse_crash(filename)
251            for filename in glob.iglob(os.path.join(output_dir, 'failures',
252                                                    '*.info.txt')))
253
254
255def read_bugs(output_dir, html):
256    # type: (str, bool) -> Generator[Dict[str, Any], None, None]
257    """ Generate a unique sequence of bugs from given output directory.
258
259    Duplicates can be in a project if the same module was compiled multiple
260    times with different compiler options. These would be better to show in
261    the final report (cover) only once. """
262
263    def empty(file_name):
264        return os.stat(file_name).st_size == 0
265
266    duplicate = duplicate_check(
267        lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug))
268
269    # get the right parser for the job.
270    parser = parse_bug_html if html else parse_bug_plist
271    # get the input files, which are not empty.
272    pattern = os.path.join(output_dir, '*.html' if html else '*.plist')
273    bug_files = (file for file in glob.iglob(pattern) if not empty(file))
274
275    for bug_file in bug_files:
276        for bug in parser(bug_file):
277            if not duplicate(bug):
278                yield bug
279
280
281def parse_bug_plist(filename):
282    """ Returns the generator of bugs from a single .plist file. """
283
284    content = plistlib.readPlist(filename)
285    files = content.get('files')
286    for bug in content.get('diagnostics', []):
287        if len(files) <= int(bug['location']['file']):
288            logging.warning('Parsing bug from "%s" failed', filename)
289            continue
290
291        yield {
292            'result': filename,
293            'bug_type': bug['type'],
294            'bug_category': bug['category'],
295            'bug_line': int(bug['location']['line']),
296            'bug_path_length': int(bug['location']['col']),
297            'bug_file': files[int(bug['location']['file'])]
298        }
299
300
301def parse_bug_html(filename):
302    """ Parse out the bug information from HTML output. """
303
304    patterns = [re.compile(r'<!-- BUGTYPE (?P<bug_type>.*) -->$'),
305                re.compile(r'<!-- BUGFILE (?P<bug_file>.*) -->$'),
306                re.compile(r'<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$'),
307                re.compile(r'<!-- BUGLINE (?P<bug_line>.*) -->$'),
308                re.compile(r'<!-- BUGCATEGORY (?P<bug_category>.*) -->$'),
309                re.compile(r'<!-- BUGDESC (?P<bug_description>.*) -->$'),
310                re.compile(r'<!-- FUNCTIONNAME (?P<bug_function>.*) -->$')]
311    endsign = re.compile(r'<!-- BUGMETAEND -->')
312
313    bug = {
314        'report_file': filename,
315        'bug_function': 'n/a',  # compatibility with < clang-3.5
316        'bug_category': 'Other',
317        'bug_line': 0,
318        'bug_path_length': 1
319    }
320
321    with open(filename) as handler:
322        for line in handler.readlines():
323            # do not read the file further
324            if endsign.match(line):
325                break
326            # search for the right lines
327            for regex in patterns:
328                match = regex.match(line.strip())
329                if match:
330                    bug.update(match.groupdict())
331                    break
332
333    encode_value(bug, 'bug_line', int)
334    encode_value(bug, 'bug_path_length', int)
335
336    yield bug
337
338
339def parse_crash(filename):
340    """ Parse out the crash information from the report file. """
341
342    match = re.match(r'(.*)\.info\.txt', filename)
343    name = match.group(1) if match else None
344    with open(filename, mode='rb') as handler:
345        # this is a workaround to fix windows read '\r\n' as new lines.
346        lines = [line.decode().rstrip() for line in handler.readlines()]
347        return {
348            'source': lines[0],
349            'problem': lines[1],
350            'file': name,
351            'info': name + '.info.txt',
352            'stderr': name + '.stderr.txt'
353        }
354
355
356def category_type_name(bug):
357    """ Create a new bug attribute from bug by category and type.
358
359    The result will be used as CSS class selector in the final report. """
360
361    def smash(key):
362        """ Make value ready to be HTML attribute value. """
363
364        return bug.get(key, '').lower().replace(' ', '_').replace("'", '')
365
366    return escape('bt_' + smash('bug_category') + '_' + smash('bug_type'))
367
368
369def create_counters():
370    """ Create counters for bug statistics.
371
372    Two entries are maintained: 'total' is an integer, represents the
373    number of bugs. The 'categories' is a two level categorisation of bug
374    counters. The first level is 'bug category' the second is 'bug type'.
375    Each entry in this classification is a dictionary of 'count', 'type'
376    and 'label'. """
377
378    def predicate(bug):
379        bug_category = bug['bug_category']
380        bug_type = bug['bug_type']
381        current_category = predicate.categories.get(bug_category, dict())
382        current_type = current_category.get(bug_type, {
383            'bug_type': bug_type,
384            'bug_type_class': category_type_name(bug),
385            'bug_count': 0
386        })
387        current_type.update({'bug_count': current_type['bug_count'] + 1})
388        current_category.update({bug_type: current_type})
389        predicate.categories.update({bug_category: current_category})
390        predicate.total += 1
391
392    predicate.total = 0
393    predicate.categories = dict()
394    return predicate
395
396
397def prettify_bug(prefix, output_dir):
398    def predicate(bug):
399        """ Make safe this values to embed into HTML. """
400
401        bug['bug_type_class'] = category_type_name(bug)
402
403        encode_value(bug, 'bug_file', lambda x: escape(chop(prefix, x)))
404        encode_value(bug, 'bug_category', escape)
405        encode_value(bug, 'bug_type', escape)
406        encode_value(bug, 'report_file', lambda x: escape(chop(output_dir, x)))
407        return bug
408
409    return predicate
410
411
412def prettify_crash(prefix, output_dir):
413    def predicate(crash):
414        """ Make safe this values to embed into HTML. """
415
416        encode_value(crash, 'source', lambda x: escape(chop(prefix, x)))
417        encode_value(crash, 'problem', escape)
418        encode_value(crash, 'file', lambda x: escape(chop(output_dir, x)))
419        encode_value(crash, 'info', lambda x: escape(chop(output_dir, x)))
420        encode_value(crash, 'stderr', lambda x: escape(chop(output_dir, x)))
421        return crash
422
423    return predicate
424
425
426def copy_resource_files(output_dir):
427    """ Copy the javascript and css files to the report directory. """
428
429    this_dir = os.path.dirname(os.path.realpath(__file__))
430    for resource in os.listdir(os.path.join(this_dir, 'resources')):
431        shutil.copy(os.path.join(this_dir, 'resources', resource), output_dir)
432
433
434def encode_value(container, key, encode):
435    """ Run 'encode' on 'container[key]' value and update it. """
436
437    if key in container:
438        value = encode(container[key])
439        container.update({key: value})
440
441
442def chop(prefix, filename):
443    """ Create 'filename' from '/prefix/filename' """
444
445    return filename if not len(prefix) else os.path.relpath(filename, prefix)
446
447
448def escape(text):
449    """ Paranoid HTML escape method. (Python version independent) """
450
451    escape_table = {
452        '&': '&amp;',
453        '"': '&quot;',
454        "'": '&apos;',
455        '>': '&gt;',
456        '<': '&lt;'
457    }
458    return ''.join(escape_table.get(c, c) for c in text)
459
460
461def reindent(text, indent):
462    """ Utility function to format html output and keep indentation. """
463
464    result = ''
465    for line in text.splitlines():
466        if len(line.strip()):
467            result += ' ' * indent + line.split('|')[1] + os.linesep
468    return result
469
470
471def comment(name, opts=dict()):
472    """ Utility function to format meta information as comment. """
473
474    attributes = ''
475    for key, value in opts.items():
476        attributes += ' {0}="{1}"'.format(key, value)
477
478    return '<!-- {0}{1} -->{2}'.format(name, attributes, os.linesep)
479
480
481def commonprefix_from(filename):
482    """ Create file prefix from a compilation database entries. """
483
484    with open(filename, 'r') as handle:
485        return commonprefix(item['file'] for item in json.load(handle))
486
487
488def commonprefix(files):
489    """ Fixed version of os.path.commonprefix.
490
491    :param files: list of file names.
492    :return: the longest path prefix that is a prefix of all files. """
493    result = None
494    for current in files:
495        if result is not None:
496            result = os.path.commonprefix([result, current])
497        else:
498            result = current
499
500    if result is None:
501        return ''
502    elif not os.path.isdir(result):
503        return os.path.dirname(result)
504    else:
505        return os.path.abspath(result)
506