1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module implements the 'scan-build' command API.
6
7To run the static analyzer against a build is done in multiple steps:
8
9 -- Intercept: capture the compilation command during the build,
10 -- Analyze:   run the analyzer against the captured commands,
11 -- Report:    create a cover report from the analyzer outputs.  """
12
13import re
14import os
15import os.path
16import json
17import logging
18import multiprocessing
19import tempfile
20import functools
21import subprocess
22import contextlib
23import datetime
24import shutil
25import glob
26from collections import defaultdict
27
28from libscanbuild import command_entry_point, compiler_wrapper, \
29    wrapper_environment, run_build, run_command, CtuConfig
30from libscanbuild.arguments import parse_args_for_scan_build, \
31    parse_args_for_analyze_build
32from libscanbuild.intercept import capture
33from libscanbuild.report import document
34from libscanbuild.compilation import split_command, classify_source, \
35    compiler_language
36from libscanbuild.clang import get_version, get_arguments, get_triple_arch, \
37    ClangErrorException
38from libscanbuild.shell import decode
39
40__all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']
41
42COMPILER_WRAPPER_CC = 'analyze-cc'
43COMPILER_WRAPPER_CXX = 'analyze-c++'
44
45CTU_EXTDEF_MAP_FILENAME = 'externalDefMap.txt'
46CTU_TEMP_DEFMAP_FOLDER = 'tmpExternalDefMaps'
47
48
49@command_entry_point
50def scan_build():
51    """ Entry point for scan-build command. """
52
53    args = parse_args_for_scan_build()
54    # will re-assign the report directory as new output
55    with report_directory(
56            args.output, args.keep_empty, args.output_format) as args.output:
57        # Run against a build command. there are cases, when analyzer run
58        # is not required. But we need to set up everything for the
59        # wrappers, because 'configure' needs to capture the CC/CXX values
60        # for the Makefile.
61        if args.intercept_first:
62            # Run build command with intercept module.
63            exit_code = capture(args)
64            # Run the analyzer against the captured commands.
65            if need_analyzer(args.build):
66                govern_analyzer_runs(args)
67        else:
68            # Run build command and analyzer with compiler wrappers.
69            environment = setup_environment(args)
70            exit_code = run_build(args.build, env=environment)
71        # Cover report generation and bug counting.
72        number_of_bugs = document(args)
73        # Set exit status as it was requested.
74        return number_of_bugs if args.status_bugs else exit_code
75
76
77@command_entry_point
78def analyze_build():
79    """ Entry point for analyze-build command. """
80
81    args = parse_args_for_analyze_build()
82    # will re-assign the report directory as new output
83    with report_directory(args.output, args.keep_empty, args.output_format) as args.output:
84        # Run the analyzer against a compilation db.
85        govern_analyzer_runs(args)
86        # Cover report generation and bug counting.
87        number_of_bugs = document(args)
88        # Set exit status as it was requested.
89        return number_of_bugs if args.status_bugs else 0
90
91
92def need_analyzer(args):
93    """ Check the intent of the build command.
94
95    When static analyzer run against project configure step, it should be
96    silent and no need to run the analyzer or generate report.
97
98    To run `scan-build` against the configure step might be necessary,
99    when compiler wrappers are used. That's the moment when build setup
100    check the compiler and capture the location for the build process. """
101
102    return len(args) and not re.search(r'configure|autogen', args[0])
103
104
105def prefix_with(constant, pieces):
106    """ From a sequence create another sequence where every second element
107    is from the original sequence and the odd elements are the prefix.
108
109    eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
110
111    return [elem for piece in pieces for elem in [constant, piece]]
112
113
114def get_ctu_config_from_args(args):
115    """ CTU configuration is created from the chosen phases and dir. """
116
117    return (
118        CtuConfig(collect=args.ctu_phases.collect,
119                  analyze=args.ctu_phases.analyze,
120                  dir=args.ctu_dir,
121                  extdef_map_cmd=args.extdef_map_cmd)
122        if hasattr(args, 'ctu_phases') and hasattr(args.ctu_phases, 'dir')
123        else CtuConfig(collect=False, analyze=False, dir='', extdef_map_cmd=''))
124
125
126def get_ctu_config_from_json(ctu_conf_json):
127    """ CTU configuration is created from the chosen phases and dir. """
128
129    ctu_config = json.loads(ctu_conf_json)
130    # Recover namedtuple from json when coming from analyze-cc or analyze-c++
131    return CtuConfig(collect=ctu_config[0],
132                     analyze=ctu_config[1],
133                     dir=ctu_config[2],
134                     extdef_map_cmd=ctu_config[3])
135
136
137def create_global_ctu_extdef_map(extdef_map_lines):
138    """ Takes iterator of individual external definition maps and creates a
139    global map keeping only unique names. We leave conflicting names out of
140    CTU.
141
142    :param extdef_map_lines: Contains the id of a definition (mangled name) and
143    the originating source (the corresponding AST file) name.
144    :type extdef_map_lines: Iterator of str.
145    :returns: Mangled name - AST file pairs.
146    :rtype: List of (str, str) tuples.
147    """
148
149    mangled_to_asts = defaultdict(set)
150
151    for line in extdef_map_lines:
152        mangled_name, ast_file = line.strip().split(' ', 1)
153        mangled_to_asts[mangled_name].add(ast_file)
154
155    mangled_ast_pairs = []
156
157    for mangled_name, ast_files in mangled_to_asts.items():
158        if len(ast_files) == 1:
159            mangled_ast_pairs.append((mangled_name, next(iter(ast_files))))
160
161    return mangled_ast_pairs
162
163
164def merge_ctu_extdef_maps(ctudir):
165    """ Merge individual external definition maps into a global one.
166
167    As the collect phase runs parallel on multiple threads, all compilation
168    units are separately mapped into a temporary file in CTU_TEMP_DEFMAP_FOLDER.
169    These definition maps contain the mangled names and the source
170    (AST generated from the source) which had their definition.
171    These files should be merged at the end into a global map file:
172    CTU_EXTDEF_MAP_FILENAME."""
173
174    def generate_extdef_map_lines(extdefmap_dir):
175        """ Iterate over all lines of input files in a determined order. """
176
177        files = glob.glob(os.path.join(extdefmap_dir, '*'))
178        files.sort()
179        for filename in files:
180            with open(filename, 'r') as in_file:
181                for line in in_file:
182                    yield line
183
184    def write_global_map(arch, mangled_ast_pairs):
185        """ Write (mangled name, ast file) pairs into final file. """
186
187        extern_defs_map_file = os.path.join(ctudir, arch,
188                                           CTU_EXTDEF_MAP_FILENAME)
189        with open(extern_defs_map_file, 'w') as out_file:
190            for mangled_name, ast_file in mangled_ast_pairs:
191                out_file.write('%s %s\n' % (mangled_name, ast_file))
192
193    triple_arches = glob.glob(os.path.join(ctudir, '*'))
194    for triple_path in triple_arches:
195        if os.path.isdir(triple_path):
196            triple_arch = os.path.basename(triple_path)
197            extdefmap_dir = os.path.join(ctudir, triple_arch,
198                                     CTU_TEMP_DEFMAP_FOLDER)
199
200            extdef_map_lines = generate_extdef_map_lines(extdefmap_dir)
201            mangled_ast_pairs = create_global_ctu_extdef_map(extdef_map_lines)
202            write_global_map(triple_arch, mangled_ast_pairs)
203
204            # Remove all temporary files
205            shutil.rmtree(extdefmap_dir, ignore_errors=True)
206
207
208def run_analyzer_parallel(args):
209    """ Runs the analyzer against the given compilation database. """
210
211    def exclude(filename, directory):
212        """ Return true when any excluded directory prefix the filename. """
213        if not os.path.isabs(filename):
214            # filename is either absolute or relative to directory. Need to turn
215            # it to absolute since 'args.excludes' are absolute paths.
216            filename = os.path.normpath(os.path.join(directory, filename))
217        return any(re.match(r'^' + exclude_directory, filename)
218                   for exclude_directory in args.excludes)
219
220    consts = {
221        'clang': args.clang,
222        'output_dir': args.output,
223        'output_format': args.output_format,
224        'output_failures': args.output_failures,
225        'direct_args': analyzer_params(args),
226        'force_debug': args.force_debug,
227        'ctu': get_ctu_config_from_args(args)
228    }
229
230    logging.debug('run analyzer against compilation database')
231    with open(args.cdb, 'r') as handle:
232        generator = (dict(cmd, **consts)
233                     for cmd in json.load(handle) if not exclude(
234                            cmd['file'], cmd['directory']))
235        # when verbose output requested execute sequentially
236        pool = multiprocessing.Pool(1 if args.verbose > 2 else None)
237        for current in pool.imap_unordered(run, generator):
238            if current is not None:
239                # display error message from the static analyzer
240                for line in current['error_output']:
241                    logging.info(line.rstrip())
242        pool.close()
243        pool.join()
244
245
246def govern_analyzer_runs(args):
247    """ Governs multiple runs in CTU mode or runs once in normal mode. """
248
249    ctu_config = get_ctu_config_from_args(args)
250    # If we do a CTU collect (1st phase) we remove all previous collection
251    # data first.
252    if ctu_config.collect:
253        shutil.rmtree(ctu_config.dir, ignore_errors=True)
254
255    # If the user asked for a collect (1st) and analyze (2nd) phase, we do an
256    # all-in-one run where we deliberately remove collection data before and
257    # also after the run. If the user asks only for a single phase data is
258    # left so multiple analyze runs can use the same data gathered by a single
259    # collection run.
260    if ctu_config.collect and ctu_config.analyze:
261        # CTU strings are coming from args.ctu_dir and extdef_map_cmd,
262        # so we can leave it empty
263        args.ctu_phases = CtuConfig(collect=True, analyze=False,
264                                    dir='', extdef_map_cmd='')
265        run_analyzer_parallel(args)
266        merge_ctu_extdef_maps(ctu_config.dir)
267        args.ctu_phases = CtuConfig(collect=False, analyze=True,
268                                    dir='', extdef_map_cmd='')
269        run_analyzer_parallel(args)
270        shutil.rmtree(ctu_config.dir, ignore_errors=True)
271    else:
272        # Single runs (collect or analyze) are launched from here.
273        run_analyzer_parallel(args)
274        if ctu_config.collect:
275            merge_ctu_extdef_maps(ctu_config.dir)
276
277
278def setup_environment(args):
279    """ Set up environment for build command to interpose compiler wrapper. """
280
281    environment = dict(os.environ)
282    environment.update(wrapper_environment(args))
283    environment.update({
284        'CC': COMPILER_WRAPPER_CC,
285        'CXX': COMPILER_WRAPPER_CXX,
286        'ANALYZE_BUILD_CLANG': args.clang if need_analyzer(args.build) else '',
287        'ANALYZE_BUILD_REPORT_DIR': args.output,
288        'ANALYZE_BUILD_REPORT_FORMAT': args.output_format,
289        'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '',
290        'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)),
291        'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else '',
292        'ANALYZE_BUILD_CTU': json.dumps(get_ctu_config_from_args(args))
293    })
294    return environment
295
296
297@command_entry_point
298def analyze_compiler_wrapper():
299    """ Entry point for `analyze-cc` and `analyze-c++` compiler wrappers. """
300
301    return compiler_wrapper(analyze_compiler_wrapper_impl)
302
303
304def analyze_compiler_wrapper_impl(result, execution):
305    """ Implements analyzer compiler wrapper functionality. """
306
307    # don't run analyzer when compilation fails. or when it's not requested.
308    if result or not os.getenv('ANALYZE_BUILD_CLANG'):
309        return
310
311    # check is it a compilation?
312    compilation = split_command(execution.cmd)
313    if compilation is None:
314        return
315    # collect the needed parameters from environment, crash when missing
316    parameters = {
317        'clang': os.getenv('ANALYZE_BUILD_CLANG'),
318        'output_dir': os.getenv('ANALYZE_BUILD_REPORT_DIR'),
319        'output_format': os.getenv('ANALYZE_BUILD_REPORT_FORMAT'),
320        'output_failures': os.getenv('ANALYZE_BUILD_REPORT_FAILURES'),
321        'direct_args': os.getenv('ANALYZE_BUILD_PARAMETERS',
322                                 '').split(' '),
323        'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'),
324        'directory': execution.cwd,
325        'command': [execution.cmd[0], '-c'] + compilation.flags,
326        'ctu': get_ctu_config_from_json(os.getenv('ANALYZE_BUILD_CTU'))
327    }
328    # call static analyzer against the compilation
329    for source in compilation.files:
330        parameters.update({'file': source})
331        logging.debug('analyzer parameters %s', parameters)
332        current = run(parameters)
333        # display error message from the static analyzer
334        if current is not None:
335            for line in current['error_output']:
336                logging.info(line.rstrip())
337
338
339@contextlib.contextmanager
340def report_directory(hint, keep, output_format):
341    """ Responsible for the report directory.
342
343    hint -- could specify the parent directory of the output directory.
344    keep -- a boolean value to keep or delete the empty report directory. """
345
346    stamp_format = 'scan-build-%Y-%m-%d-%H-%M-%S-%f-'
347    stamp = datetime.datetime.now().strftime(stamp_format)
348    parent_dir = os.path.abspath(hint)
349    if not os.path.exists(parent_dir):
350        os.makedirs(parent_dir)
351    name = tempfile.mkdtemp(prefix=stamp, dir=parent_dir)
352
353    logging.info('Report directory created: %s', name)
354
355    try:
356        yield name
357    finally:
358        if os.listdir(name):
359            if output_format not in ['sarif', 'sarif-html']: # FIXME:
360                # 'scan-view' currently does not support sarif format.
361                msg = "Run 'scan-view %s' to examine bug reports."
362            elif output_format == 'sarif-html':
363                msg = "Run 'scan-view %s' to examine bug reports or see " \
364                    "merged sarif results at %s/results-merged.sarif."
365            else:
366                msg = "View merged sarif results at %s/results-merged.sarif."
367            keep = True
368        else:
369            if keep:
370                msg = "Report directory '%s' contains no report, but kept."
371            else:
372                msg = "Removing directory '%s' because it contains no report."
373        logging.warning(msg, name)
374
375        if not keep:
376            os.rmdir(name)
377
378
379def analyzer_params(args):
380    """ A group of command line arguments can mapped to command
381    line arguments of the analyzer. This method generates those. """
382
383    result = []
384
385    if args.store_model:
386        result.append('-analyzer-store={0}'.format(args.store_model))
387    if args.constraints_model:
388        result.append('-analyzer-constraints={0}'.format(
389            args.constraints_model))
390    if args.internal_stats:
391        result.append('-analyzer-stats')
392    if args.analyze_headers:
393        result.append('-analyzer-opt-analyze-headers')
394    if args.stats:
395        result.append('-analyzer-checker=debug.Stats')
396    if args.maxloop:
397        result.extend(['-analyzer-max-loop', str(args.maxloop)])
398    if args.output_format:
399        result.append('-analyzer-output={0}'.format(args.output_format))
400    if args.analyzer_config:
401        result.extend(['-analyzer-config', args.analyzer_config])
402    if args.verbose >= 4:
403        result.append('-analyzer-display-progress')
404    if args.plugins:
405        result.extend(prefix_with('-load', args.plugins))
406    if args.enable_checker:
407        checkers = ','.join(args.enable_checker)
408        result.extend(['-analyzer-checker', checkers])
409    if args.disable_checker:
410        checkers = ','.join(args.disable_checker)
411        result.extend(['-analyzer-disable-checker', checkers])
412
413    return prefix_with('-Xclang', result)
414
415
416def require(required):
417    """ Decorator for checking the required values in state.
418
419    It checks the required attributes in the passed state and stop when
420    any of those is missing. """
421
422    def decorator(function):
423        @functools.wraps(function)
424        def wrapper(*args, **kwargs):
425            for key in required:
426                if key not in args[0]:
427                    raise KeyError('{0} not passed to {1}'.format(
428                        key, function.__name__))
429
430            return function(*args, **kwargs)
431
432        return wrapper
433
434    return decorator
435
436
437@require(['command',  # entry from compilation database
438          'directory',  # entry from compilation database
439          'file',  # entry from compilation database
440          'clang',  # clang executable name (and path)
441          'direct_args',  # arguments from command line
442          'force_debug',  # kill non debug macros
443          'output_dir',  # where generated report files shall go
444          'output_format',  # it's 'plist', 'html', 'plist-html', 'plist-multi-file', 'sarif', or 'sarif-html'
445          'output_failures',  # generate crash reports or not
446          'ctu'])  # ctu control options
447def run(opts):
448    """ Entry point to run (or not) static analyzer against a single entry
449    of the compilation database.
450
451    This complex task is decomposed into smaller methods which are calling
452    each other in chain. If the analysis is not possible the given method
453    just return and break the chain.
454
455    The passed parameter is a python dictionary. Each method first check
456    that the needed parameters received. (This is done by the 'require'
457    decorator. It's like an 'assert' to check the contract between the
458    caller and the called method.) """
459
460    try:
461        command = opts.pop('command')
462        command = command if isinstance(command, list) else decode(command)
463        logging.debug("Run analyzer against '%s'", command)
464        opts.update(classify_parameters(command))
465
466        return arch_check(opts)
467    except Exception:
468        logging.error("Problem occurred during analysis.", exc_info=1)
469        return None
470
471
472@require(['clang', 'directory', 'flags', 'file', 'output_dir', 'language',
473          'error_output', 'exit_code'])
474def report_failure(opts):
475    """ Create report when analyzer failed.
476
477    The major report is the preprocessor output. The output filename generated
478    randomly. The compiler output also captured into '.stderr.txt' file.
479    And some more execution context also saved into '.info.txt' file. """
480
481    def extension():
482        """ Generate preprocessor file extension. """
483
484        mapping = {'objective-c++': '.mii', 'objective-c': '.mi', 'c++': '.ii'}
485        return mapping.get(opts['language'], '.i')
486
487    def destination():
488        """ Creates failures directory if not exits yet. """
489
490        failures_dir = os.path.join(opts['output_dir'], 'failures')
491        if not os.path.isdir(failures_dir):
492            os.makedirs(failures_dir)
493        return failures_dir
494
495    # Classify error type: when Clang terminated by a signal it's a 'Crash'.
496    # (python subprocess Popen.returncode is negative when child terminated
497    # by signal.) Everything else is 'Other Error'.
498    error = 'crash' if opts['exit_code'] < 0 else 'other_error'
499    # Create preprocessor output file name. (This is blindly following the
500    # Perl implementation.)
501    (handle, name) = tempfile.mkstemp(suffix=extension(),
502                                      prefix='clang_' + error + '_',
503                                      dir=destination())
504    os.close(handle)
505    # Execute Clang again, but run the syntax check only.
506    cwd = opts['directory']
507    cmd = [opts['clang'], '-fsyntax-only', '-E'] + opts['flags'] + \
508        [opts['file'], '-o', name]
509    try:
510        cmd = get_arguments(cmd, cwd)
511        run_command(cmd, cwd=cwd)
512    except subprocess.CalledProcessError:
513        pass
514    except ClangErrorException:
515        pass
516    # write general information about the crash
517    with open(name + '.info.txt', 'w') as handle:
518        handle.write(opts['file'] + os.linesep)
519        handle.write(error.title().replace('_', ' ') + os.linesep)
520        handle.write(' '.join(cmd) + os.linesep)
521        handle.write(' '.join(os.uname()) + os.linesep)
522        handle.write(get_version(opts['clang']))
523        handle.close()
524    # write the captured output too
525    with open(name + '.stderr.txt', 'w') as handle:
526        handle.writelines(opts['error_output'])
527        handle.close()
528
529
530@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'output_dir',
531          'output_format'])
532def run_analyzer(opts, continuation=report_failure):
533    """ It assembles the analysis command line and executes it. Capture the
534    output of the analysis and returns with it. If failure reports are
535    requested, it calls the continuation to generate it. """
536
537    def target():
538        """ Creates output file name for reports. """
539        if opts['output_format'] in {
540                'plist',
541                'plist-html',
542                'plist-multi-file'}:
543            (handle, name) = tempfile.mkstemp(prefix='report-',
544                                              suffix='.plist',
545                                              dir=opts['output_dir'])
546            os.close(handle)
547            return name
548        elif opts['output_format'] in {
549                'sarif',
550                'sarif-html'}:
551            (handle, name) = tempfile.mkstemp(prefix='result-',
552                                              suffix='.sarif',
553                                              dir=opts['output_dir'])
554            os.close(handle)
555            return name
556        return opts['output_dir']
557
558    try:
559        cwd = opts['directory']
560        cmd = get_arguments([opts['clang'], '--analyze'] +
561                            opts['direct_args'] + opts['flags'] +
562                            [opts['file'], '-o', target()],
563                            cwd)
564        output = run_command(cmd, cwd=cwd)
565        return {'error_output': output, 'exit_code': 0}
566    except subprocess.CalledProcessError as ex:
567        result = {'error_output': ex.output, 'exit_code': ex.returncode}
568        if opts.get('output_failures', False):
569            opts.update(result)
570            continuation(opts)
571        return result
572    except ClangErrorException as ex:
573        result = {'error_output': ex.error, 'exit_code': 0}
574        if opts.get('output_failures', False):
575            opts.update(result)
576            continuation(opts)
577        return result
578
579
580def extdef_map_list_src_to_ast(extdef_src_list):
581    """ Turns textual external definition map list with source files into an
582    external definition map list with ast files. """
583
584    extdef_ast_list = []
585    for extdef_src_txt in extdef_src_list:
586        mangled_name, path = extdef_src_txt.split(" ", 1)
587        # Normalize path on windows as well
588        path = os.path.splitdrive(path)[1]
589        # Make relative path out of absolute
590        path = path[1:] if path[0] == os.sep else path
591        ast_path = os.path.join("ast", path + ".ast")
592        extdef_ast_list.append(mangled_name + " " + ast_path)
593    return extdef_ast_list
594
595
596@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'ctu'])
597def ctu_collect_phase(opts):
598    """ Preprocess source by generating all data needed by CTU analysis. """
599
600    def generate_ast(triple_arch):
601        """ Generates ASTs for the current compilation command. """
602
603        args = opts['direct_args'] + opts['flags']
604        ast_joined_path = os.path.join(opts['ctu'].dir, triple_arch, 'ast',
605                                       os.path.realpath(opts['file'])[1:] +
606                                       '.ast')
607        ast_path = os.path.abspath(ast_joined_path)
608        ast_dir = os.path.dirname(ast_path)
609        if not os.path.isdir(ast_dir):
610            try:
611                os.makedirs(ast_dir)
612            except OSError:
613                # In case an other process already created it.
614                pass
615        ast_command = [opts['clang'], '-emit-ast']
616        ast_command.extend(args)
617        ast_command.append('-w')
618        ast_command.append(opts['file'])
619        ast_command.append('-o')
620        ast_command.append(ast_path)
621        logging.debug("Generating AST using '%s'", ast_command)
622        run_command(ast_command, cwd=opts['directory'])
623
624    def map_extdefs(triple_arch):
625        """ Generate external definition map file for the current source. """
626
627        args = opts['direct_args'] + opts['flags']
628        extdefmap_command = [opts['ctu'].extdef_map_cmd]
629        extdefmap_command.append(opts['file'])
630        extdefmap_command.append('--')
631        extdefmap_command.extend(args)
632        logging.debug("Generating external definition map using '%s'",
633                      extdefmap_command)
634        extdef_src_list = run_command(extdefmap_command, cwd=opts['directory'])
635        extdef_ast_list = extdef_map_list_src_to_ast(extdef_src_list)
636        extern_defs_map_folder = os.path.join(opts['ctu'].dir, triple_arch,
637                                             CTU_TEMP_DEFMAP_FOLDER)
638        if not os.path.isdir(extern_defs_map_folder):
639            try:
640                os.makedirs(extern_defs_map_folder)
641            except OSError:
642                # In case an other process already created it.
643                pass
644        if extdef_ast_list:
645            with tempfile.NamedTemporaryFile(mode='w',
646                                             dir=extern_defs_map_folder,
647                                             delete=False) as out_file:
648                out_file.write("\n".join(extdef_ast_list) + "\n")
649
650    cwd = opts['directory']
651    cmd = [opts['clang'], '--analyze'] + opts['direct_args'] + opts['flags'] \
652        + [opts['file']]
653    triple_arch = get_triple_arch(cmd, cwd)
654    generate_ast(triple_arch)
655    map_extdefs(triple_arch)
656
657
658@require(['ctu'])
659def dispatch_ctu(opts, continuation=run_analyzer):
660    """ Execute only one phase of 2 phases of CTU if needed. """
661
662    ctu_config = opts['ctu']
663
664    if ctu_config.collect or ctu_config.analyze:
665        assert ctu_config.collect != ctu_config.analyze
666        if ctu_config.collect:
667            return ctu_collect_phase(opts)
668        if ctu_config.analyze:
669            cwd = opts['directory']
670            cmd = [opts['clang'], '--analyze'] + opts['direct_args'] \
671                + opts['flags'] + [opts['file']]
672            triarch = get_triple_arch(cmd, cwd)
673            ctu_options = ['ctu-dir=' + os.path.join(ctu_config.dir, triarch),
674                           'experimental-enable-naive-ctu-analysis=true']
675            analyzer_options = prefix_with('-analyzer-config', ctu_options)
676            direct_options = prefix_with('-Xanalyzer', analyzer_options)
677            opts['direct_args'].extend(direct_options)
678
679    return continuation(opts)
680
681
682@require(['flags', 'force_debug'])
683def filter_debug_flags(opts, continuation=dispatch_ctu):
684    """ Filter out nondebug macros when requested. """
685
686    if opts.pop('force_debug'):
687        # lazy implementation just append an undefine macro at the end
688        opts.update({'flags': opts['flags'] + ['-UNDEBUG']})
689
690    return continuation(opts)
691
692
693@require(['language', 'compiler', 'file', 'flags'])
694def language_check(opts, continuation=filter_debug_flags):
695    """ Find out the language from command line parameters or file name
696    extension. The decision also influenced by the compiler invocation. """
697
698    accepted = frozenset({
699        'c', 'c++', 'objective-c', 'objective-c++', 'c-cpp-output',
700        'c++-cpp-output', 'objective-c-cpp-output'
701    })
702
703    # language can be given as a parameter...
704    language = opts.pop('language')
705    compiler = opts.pop('compiler')
706    # ... or find out from source file extension
707    if language is None and compiler is not None:
708        language = classify_source(opts['file'], compiler == 'c')
709
710    if language is None:
711        logging.debug('skip analysis, language not known')
712        return None
713    elif language not in accepted:
714        logging.debug('skip analysis, language not supported')
715        return None
716    else:
717        logging.debug('analysis, language: %s', language)
718        opts.update({'language': language,
719                     'flags': ['-x', language] + opts['flags']})
720        return continuation(opts)
721
722
723@require(['arch_list', 'flags'])
724def arch_check(opts, continuation=language_check):
725    """ Do run analyzer through one of the given architectures. """
726
727    disabled = frozenset({'ppc', 'ppc64'})
728
729    received_list = opts.pop('arch_list')
730    if received_list:
731        # filter out disabled architectures and -arch switches
732        filtered_list = [a for a in received_list if a not in disabled]
733        if filtered_list:
734            # There should be only one arch given (or the same multiple
735            # times). If there are multiple arch are given and are not
736            # the same, those should not change the pre-processing step.
737            # But that's the only pass we have before run the analyzer.
738            current = filtered_list.pop()
739            logging.debug('analysis, on arch: %s', current)
740
741            opts.update({'flags': ['-arch', current] + opts['flags']})
742            return continuation(opts)
743        else:
744            logging.debug('skip analysis, found not supported arch')
745            return None
746    else:
747        logging.debug('analysis, on default arch')
748        return continuation(opts)
749
750
751# To have good results from static analyzer certain compiler options shall be
752# omitted. The compiler flag filtering only affects the static analyzer run.
753#
754# Keys are the option name, value number of options to skip
755IGNORED_FLAGS = {
756    '-c': 0,  # compile option will be overwritten
757    '-fsyntax-only': 0,  # static analyzer option will be overwritten
758    '-o': 1,  # will set up own output file
759    # flags below are inherited from the perl implementation.
760    '-g': 0,
761    '-save-temps': 0,
762    '-install_name': 1,
763    '-exported_symbols_list': 1,
764    '-current_version': 1,
765    '-compatibility_version': 1,
766    '-init': 1,
767    '-e': 1,
768    '-seg1addr': 1,
769    '-bundle_loader': 1,
770    '-multiply_defined': 1,
771    '-sectorder': 3,
772    '--param': 1,
773    '--serialize-diagnostics': 1
774}
775
776
777def classify_parameters(command):
778    """ Prepare compiler flags (filters some and add others) and take out
779    language (-x) and architecture (-arch) flags for future processing. """
780
781    result = {
782        'flags': [],  # the filtered compiler flags
783        'arch_list': [],  # list of architecture flags
784        'language': None,  # compilation language, None, if not specified
785        'compiler': compiler_language(command)  # 'c' or 'c++'
786    }
787
788    # iterate on the compile options
789    args = iter(command[1:])
790    for arg in args:
791        # take arch flags into a separate basket
792        if arg == '-arch':
793            result['arch_list'].append(next(args))
794        # take language
795        elif arg == '-x':
796            result['language'] = next(args)
797        # parameters which looks source file are not flags
798        elif re.match(r'^[^-].+', arg) and classify_source(arg):
799            pass
800        # ignore some flags
801        elif arg in IGNORED_FLAGS:
802            count = IGNORED_FLAGS[arg]
803            for _ in range(count):
804                next(args)
805        # we don't care about extra warnings, but we should suppress ones
806        # that we don't want to see.
807        elif re.match(r'^-W.+', arg) and not re.match(r'^-Wno-.+', arg):
808            pass
809        # and consider everything else as compilation flag.
810        else:
811            result['flags'].append(arg)
812
813    return result
814