1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module implements the 'scan-build' command API.
6
7To run the static analyzer against a build is done in multiple steps:
8
9 -- Intercept: capture the compilation command during the build,
10 -- Analyze:   run the analyzer against the captured commands,
11 -- Report:    create a cover report from the analyzer outputs.  """
12
13import re
14import os
15import os.path
16import json
17import logging
18import multiprocessing
19import tempfile
20import functools
21import subprocess
22import contextlib
23import datetime
24import shutil
25import glob
26from collections import defaultdict
27
28from libscanbuild import command_entry_point, compiler_wrapper, \
29    wrapper_environment, run_build, run_command, CtuConfig
30from libscanbuild.arguments import parse_args_for_scan_build, \
31    parse_args_for_analyze_build
32from libscanbuild.intercept import capture
33from libscanbuild.report import document
34from libscanbuild.compilation import split_command, classify_source, \
35    compiler_language
36from libscanbuild.clang import get_version, get_arguments, get_triple_arch
37from libscanbuild.shell import decode
38
39__all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']
40
41COMPILER_WRAPPER_CC = 'analyze-cc'
42COMPILER_WRAPPER_CXX = 'analyze-c++'
43
44CTU_EXTDEF_MAP_FILENAME = 'externalDefMap.txt'
45CTU_TEMP_DEFMAP_FOLDER = 'tmpExternalDefMaps'
46
47
48@command_entry_point
49def scan_build():
50    """ Entry point for scan-build command. """
51
52    args = parse_args_for_scan_build()
53    # will re-assign the report directory as new output
54    with report_directory(args.output, args.keep_empty) as args.output:
55        # Run against a build command. there are cases, when analyzer run
56        # is not required. But we need to set up everything for the
57        # wrappers, because 'configure' needs to capture the CC/CXX values
58        # for the Makefile.
59        if args.intercept_first:
60            # Run build command with intercept module.
61            exit_code = capture(args)
62            # Run the analyzer against the captured commands.
63            if need_analyzer(args.build):
64                govern_analyzer_runs(args)
65        else:
66            # Run build command and analyzer with compiler wrappers.
67            environment = setup_environment(args)
68            exit_code = run_build(args.build, env=environment)
69        # Cover report generation and bug counting.
70        number_of_bugs = document(args)
71        # Set exit status as it was requested.
72        return number_of_bugs if args.status_bugs else exit_code
73
74
75@command_entry_point
76def analyze_build():
77    """ Entry point for analyze-build command. """
78
79    args = parse_args_for_analyze_build()
80    # will re-assign the report directory as new output
81    with report_directory(args.output, args.keep_empty) as args.output:
82        # Run the analyzer against a compilation db.
83        govern_analyzer_runs(args)
84        # Cover report generation and bug counting.
85        number_of_bugs = document(args)
86        # Set exit status as it was requested.
87        return number_of_bugs if args.status_bugs else 0
88
89
90def need_analyzer(args):
91    """ Check the intent of the build command.
92
93    When static analyzer run against project configure step, it should be
94    silent and no need to run the analyzer or generate report.
95
96    To run `scan-build` against the configure step might be necessary,
97    when compiler wrappers are used. That's the moment when build setup
98    check the compiler and capture the location for the build process. """
99
100    return len(args) and not re.search(r'configure|autogen', args[0])
101
102
103def prefix_with(constant, pieces):
104    """ From a sequence create another sequence where every second element
105    is from the original sequence and the odd elements are the prefix.
106
107    eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
108
109    return [elem for piece in pieces for elem in [constant, piece]]
110
111
112def get_ctu_config_from_args(args):
113    """ CTU configuration is created from the chosen phases and dir. """
114
115    return (
116        CtuConfig(collect=args.ctu_phases.collect,
117                  analyze=args.ctu_phases.analyze,
118                  dir=args.ctu_dir,
119                  extdef_map_cmd=args.extdef_map_cmd)
120        if hasattr(args, 'ctu_phases') and hasattr(args.ctu_phases, 'dir')
121        else CtuConfig(collect=False, analyze=False, dir='', extdef_map_cmd=''))
122
123
124def get_ctu_config_from_json(ctu_conf_json):
125    """ CTU configuration is created from the chosen phases and dir. """
126
127    ctu_config = json.loads(ctu_conf_json)
128    # Recover namedtuple from json when coming from analyze-cc or analyze-c++
129    return CtuConfig(collect=ctu_config[0],
130                     analyze=ctu_config[1],
131                     dir=ctu_config[2],
132                     extdef_map_cmd=ctu_config[3])
133
134
135def create_global_ctu_extdef_map(extdef_map_lines):
136    """ Takes iterator of individual external definition maps and creates a
137    global map keeping only unique names. We leave conflicting names out of
138    CTU.
139
140    :param extdef_map_lines: Contains the id of a definition (mangled name) and
141    the originating source (the corresponding AST file) name.
142    :type extdef_map_lines: Iterator of str.
143    :returns: Mangled name - AST file pairs.
144    :rtype: List of (str, str) tuples.
145    """
146
147    mangled_to_asts = defaultdict(set)
148
149    for line in extdef_map_lines:
150        mangled_name, ast_file = line.strip().split(' ', 1)
151        mangled_to_asts[mangled_name].add(ast_file)
152
153    mangled_ast_pairs = []
154
155    for mangled_name, ast_files in mangled_to_asts.items():
156        if len(ast_files) == 1:
157            mangled_ast_pairs.append((mangled_name, next(iter(ast_files))))
158
159    return mangled_ast_pairs
160
161
162def merge_ctu_extdef_maps(ctudir):
163    """ Merge individual external definition maps into a global one.
164
165    As the collect phase runs parallel on multiple threads, all compilation
166    units are separately mapped into a temporary file in CTU_TEMP_DEFMAP_FOLDER.
167    These definition maps contain the mangled names and the source
168    (AST generated from the source) which had their definition.
169    These files should be merged at the end into a global map file:
170    CTU_EXTDEF_MAP_FILENAME."""
171
172    def generate_extdef_map_lines(extdefmap_dir):
173        """ Iterate over all lines of input files in a determined order. """
174
175        files = glob.glob(os.path.join(extdefmap_dir, '*'))
176        files.sort()
177        for filename in files:
178            with open(filename, 'r') as in_file:
179                for line in in_file:
180                    yield line
181
182    def write_global_map(arch, mangled_ast_pairs):
183        """ Write (mangled name, ast file) pairs into final file. """
184
185        extern_defs_map_file = os.path.join(ctudir, arch,
186                                           CTU_EXTDEF_MAP_FILENAME)
187        with open(extern_defs_map_file, 'w') as out_file:
188            for mangled_name, ast_file in mangled_ast_pairs:
189                out_file.write('%s %s\n' % (mangled_name, ast_file))
190
191    triple_arches = glob.glob(os.path.join(ctudir, '*'))
192    for triple_path in triple_arches:
193        if os.path.isdir(triple_path):
194            triple_arch = os.path.basename(triple_path)
195            extdefmap_dir = os.path.join(ctudir, triple_arch,
196                                     CTU_TEMP_DEFMAP_FOLDER)
197
198            extdef_map_lines = generate_extdef_map_lines(extdefmap_dir)
199            mangled_ast_pairs = create_global_ctu_extdef_map(extdef_map_lines)
200            write_global_map(triple_arch, mangled_ast_pairs)
201
202            # Remove all temporary files
203            shutil.rmtree(extdefmap_dir, ignore_errors=True)
204
205
206def run_analyzer_parallel(args):
207    """ Runs the analyzer against the given compilation database. """
208
209    def exclude(filename):
210        """ Return true when any excluded directory prefix the filename. """
211        return any(re.match(r'^' + directory, filename)
212                   for directory in args.excludes)
213
214    consts = {
215        'clang': args.clang,
216        'output_dir': args.output,
217        'output_format': args.output_format,
218        'output_failures': args.output_failures,
219        'direct_args': analyzer_params(args),
220        'force_debug': args.force_debug,
221        'ctu': get_ctu_config_from_args(args)
222    }
223
224    logging.debug('run analyzer against compilation database')
225    with open(args.cdb, 'r') as handle:
226        generator = (dict(cmd, **consts)
227                     for cmd in json.load(handle) if not exclude(cmd['file']))
228        # when verbose output requested execute sequentially
229        pool = multiprocessing.Pool(1 if args.verbose > 2 else None)
230        for current in pool.imap_unordered(run, generator):
231            if current is not None:
232                # display error message from the static analyzer
233                for line in current['error_output']:
234                    logging.info(line.rstrip())
235        pool.close()
236        pool.join()
237
238
239def govern_analyzer_runs(args):
240    """ Governs multiple runs in CTU mode or runs once in normal mode. """
241
242    ctu_config = get_ctu_config_from_args(args)
243    # If we do a CTU collect (1st phase) we remove all previous collection
244    # data first.
245    if ctu_config.collect:
246        shutil.rmtree(ctu_config.dir, ignore_errors=True)
247
248    # If the user asked for a collect (1st) and analyze (2nd) phase, we do an
249    # all-in-one run where we deliberately remove collection data before and
250    # also after the run. If the user asks only for a single phase data is
251    # left so multiple analyze runs can use the same data gathered by a single
252    # collection run.
253    if ctu_config.collect and ctu_config.analyze:
254        # CTU strings are coming from args.ctu_dir and extdef_map_cmd,
255        # so we can leave it empty
256        args.ctu_phases = CtuConfig(collect=True, analyze=False,
257                                    dir='', extdef_map_cmd='')
258        run_analyzer_parallel(args)
259        merge_ctu_extdef_maps(ctu_config.dir)
260        args.ctu_phases = CtuConfig(collect=False, analyze=True,
261                                    dir='', extdef_map_cmd='')
262        run_analyzer_parallel(args)
263        shutil.rmtree(ctu_config.dir, ignore_errors=True)
264    else:
265        # Single runs (collect or analyze) are launched from here.
266        run_analyzer_parallel(args)
267        if ctu_config.collect:
268            merge_ctu_extdef_maps(ctu_config.dir)
269
270
271def setup_environment(args):
272    """ Set up environment for build command to interpose compiler wrapper. """
273
274    environment = dict(os.environ)
275    environment.update(wrapper_environment(args))
276    environment.update({
277        'CC': COMPILER_WRAPPER_CC,
278        'CXX': COMPILER_WRAPPER_CXX,
279        'ANALYZE_BUILD_CLANG': args.clang if need_analyzer(args.build) else '',
280        'ANALYZE_BUILD_REPORT_DIR': args.output,
281        'ANALYZE_BUILD_REPORT_FORMAT': args.output_format,
282        'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '',
283        'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)),
284        'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else '',
285        'ANALYZE_BUILD_CTU': json.dumps(get_ctu_config_from_args(args))
286    })
287    return environment
288
289
290@command_entry_point
291def analyze_compiler_wrapper():
292    """ Entry point for `analyze-cc` and `analyze-c++` compiler wrappers. """
293
294    return compiler_wrapper(analyze_compiler_wrapper_impl)
295
296
297def analyze_compiler_wrapper_impl(result, execution):
298    """ Implements analyzer compiler wrapper functionality. """
299
300    # don't run analyzer when compilation fails. or when it's not requested.
301    if result or not os.getenv('ANALYZE_BUILD_CLANG'):
302        return
303
304    # check is it a compilation?
305    compilation = split_command(execution.cmd)
306    if compilation is None:
307        return
308    # collect the needed parameters from environment, crash when missing
309    parameters = {
310        'clang': os.getenv('ANALYZE_BUILD_CLANG'),
311        'output_dir': os.getenv('ANALYZE_BUILD_REPORT_DIR'),
312        'output_format': os.getenv('ANALYZE_BUILD_REPORT_FORMAT'),
313        'output_failures': os.getenv('ANALYZE_BUILD_REPORT_FAILURES'),
314        'direct_args': os.getenv('ANALYZE_BUILD_PARAMETERS',
315                                 '').split(' '),
316        'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'),
317        'directory': execution.cwd,
318        'command': [execution.cmd[0], '-c'] + compilation.flags,
319        'ctu': get_ctu_config_from_json(os.getenv('ANALYZE_BUILD_CTU'))
320    }
321    # call static analyzer against the compilation
322    for source in compilation.files:
323        parameters.update({'file': source})
324        logging.debug('analyzer parameters %s', parameters)
325        current = run(parameters)
326        # display error message from the static analyzer
327        if current is not None:
328            for line in current['error_output']:
329                logging.info(line.rstrip())
330
331
332@contextlib.contextmanager
333def report_directory(hint, keep):
334    """ Responsible for the report directory.
335
336    hint -- could specify the parent directory of the output directory.
337    keep -- a boolean value to keep or delete the empty report directory. """
338
339    stamp_format = 'scan-build-%Y-%m-%d-%H-%M-%S-%f-'
340    stamp = datetime.datetime.now().strftime(stamp_format)
341    parent_dir = os.path.abspath(hint)
342    if not os.path.exists(parent_dir):
343        os.makedirs(parent_dir)
344    name = tempfile.mkdtemp(prefix=stamp, dir=parent_dir)
345
346    logging.info('Report directory created: %s', name)
347
348    try:
349        yield name
350    finally:
351        if os.listdir(name):
352            msg = "Run 'scan-view %s' to examine bug reports."
353            keep = True
354        else:
355            if keep:
356                msg = "Report directory '%s' contains no report, but kept."
357            else:
358                msg = "Removing directory '%s' because it contains no report."
359        logging.warning(msg, name)
360
361        if not keep:
362            os.rmdir(name)
363
364
365def analyzer_params(args):
366    """ A group of command line arguments can mapped to command
367    line arguments of the analyzer. This method generates those. """
368
369    result = []
370
371    if args.store_model:
372        result.append('-analyzer-store={0}'.format(args.store_model))
373    if args.constraints_model:
374        result.append('-analyzer-constraints={0}'.format(
375            args.constraints_model))
376    if args.internal_stats:
377        result.append('-analyzer-stats')
378    if args.analyze_headers:
379        result.append('-analyzer-opt-analyze-headers')
380    if args.stats:
381        result.append('-analyzer-checker=debug.Stats')
382    if args.maxloop:
383        result.extend(['-analyzer-max-loop', str(args.maxloop)])
384    if args.output_format:
385        result.append('-analyzer-output={0}'.format(args.output_format))
386    if args.analyzer_config:
387        result.extend(['-analyzer-config', args.analyzer_config])
388    if args.verbose >= 4:
389        result.append('-analyzer-display-progress')
390    if args.plugins:
391        result.extend(prefix_with('-load', args.plugins))
392    if args.enable_checker:
393        checkers = ','.join(args.enable_checker)
394        result.extend(['-analyzer-checker', checkers])
395    if args.disable_checker:
396        checkers = ','.join(args.disable_checker)
397        result.extend(['-analyzer-disable-checker', checkers])
398
399    return prefix_with('-Xclang', result)
400
401
402def require(required):
403    """ Decorator for checking the required values in state.
404
405    It checks the required attributes in the passed state and stop when
406    any of those is missing. """
407
408    def decorator(function):
409        @functools.wraps(function)
410        def wrapper(*args, **kwargs):
411            for key in required:
412                if key not in args[0]:
413                    raise KeyError('{0} not passed to {1}'.format(
414                        key, function.__name__))
415
416            return function(*args, **kwargs)
417
418        return wrapper
419
420    return decorator
421
422
423@require(['command',  # entry from compilation database
424          'directory',  # entry from compilation database
425          'file',  # entry from compilation database
426          'clang',  # clang executable name (and path)
427          'direct_args',  # arguments from command line
428          'force_debug',  # kill non debug macros
429          'output_dir',  # where generated report files shall go
430          'output_format',  # it's 'plist', 'html', both or plist-multi-file
431          'output_failures',  # generate crash reports or not
432          'ctu'])  # ctu control options
433def run(opts):
434    """ Entry point to run (or not) static analyzer against a single entry
435    of the compilation database.
436
437    This complex task is decomposed into smaller methods which are calling
438    each other in chain. If the analyzis is not possible the given method
439    just return and break the chain.
440
441    The passed parameter is a python dictionary. Each method first check
442    that the needed parameters received. (This is done by the 'require'
443    decorator. It's like an 'assert' to check the contract between the
444    caller and the called method.) """
445
446    try:
447        command = opts.pop('command')
448        command = command if isinstance(command, list) else decode(command)
449        logging.debug("Run analyzer against '%s'", command)
450        opts.update(classify_parameters(command))
451
452        return arch_check(opts)
453    except Exception:
454        logging.error("Problem occurred during analyzis.", exc_info=1)
455        return None
456
457
458@require(['clang', 'directory', 'flags', 'file', 'output_dir', 'language',
459          'error_output', 'exit_code'])
460def report_failure(opts):
461    """ Create report when analyzer failed.
462
463    The major report is the preprocessor output. The output filename generated
464    randomly. The compiler output also captured into '.stderr.txt' file.
465    And some more execution context also saved into '.info.txt' file. """
466
467    def extension():
468        """ Generate preprocessor file extension. """
469
470        mapping = {'objective-c++': '.mii', 'objective-c': '.mi', 'c++': '.ii'}
471        return mapping.get(opts['language'], '.i')
472
473    def destination():
474        """ Creates failures directory if not exits yet. """
475
476        failures_dir = os.path.join(opts['output_dir'], 'failures')
477        if not os.path.isdir(failures_dir):
478            os.makedirs(failures_dir)
479        return failures_dir
480
481    # Classify error type: when Clang terminated by a signal it's a 'Crash'.
482    # (python subprocess Popen.returncode is negative when child terminated
483    # by signal.) Everything else is 'Other Error'.
484    error = 'crash' if opts['exit_code'] < 0 else 'other_error'
485    # Create preprocessor output file name. (This is blindly following the
486    # Perl implementation.)
487    (handle, name) = tempfile.mkstemp(suffix=extension(),
488                                      prefix='clang_' + error + '_',
489                                      dir=destination())
490    os.close(handle)
491    # Execute Clang again, but run the syntax check only.
492    cwd = opts['directory']
493    cmd = get_arguments(
494        [opts['clang'], '-fsyntax-only', '-E'
495         ] + opts['flags'] + [opts['file'], '-o', name], cwd)
496    run_command(cmd, cwd=cwd)
497    # write general information about the crash
498    with open(name + '.info.txt', 'w') as handle:
499        handle.write(opts['file'] + os.linesep)
500        handle.write(error.title().replace('_', ' ') + os.linesep)
501        handle.write(' '.join(cmd) + os.linesep)
502        handle.write(' '.join(os.uname()) + os.linesep)
503        handle.write(get_version(opts['clang']))
504        handle.close()
505    # write the captured output too
506    with open(name + '.stderr.txt', 'w') as handle:
507        handle.writelines(opts['error_output'])
508        handle.close()
509
510
511@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'output_dir',
512          'output_format'])
513def run_analyzer(opts, continuation=report_failure):
514    """ It assembles the analysis command line and executes it. Capture the
515    output of the analysis and returns with it. If failure reports are
516    requested, it calls the continuation to generate it. """
517
518    def target():
519        """ Creates output file name for reports. """
520        if opts['output_format'] in {
521                'plist',
522                'plist-html',
523                'plist-multi-file'}:
524            (handle, name) = tempfile.mkstemp(prefix='report-',
525                                              suffix='.plist',
526                                              dir=opts['output_dir'])
527            os.close(handle)
528            return name
529        return opts['output_dir']
530
531    try:
532        cwd = opts['directory']
533        cmd = get_arguments([opts['clang'], '--analyze'] +
534                            opts['direct_args'] + opts['flags'] +
535                            [opts['file'], '-o', target()],
536                            cwd)
537        output = run_command(cmd, cwd=cwd)
538        return {'error_output': output, 'exit_code': 0}
539    except subprocess.CalledProcessError as ex:
540        result = {'error_output': ex.output, 'exit_code': ex.returncode}
541        if opts.get('output_failures', False):
542            opts.update(result)
543            continuation(opts)
544        return result
545
546
547def extdef_map_list_src_to_ast(extdef_src_list):
548    """ Turns textual external definition map list with source files into an
549    external definition map list with ast files. """
550
551    extdef_ast_list = []
552    for extdef_src_txt in extdef_src_list:
553        mangled_name, path = extdef_src_txt.split(" ", 1)
554        # Normalize path on windows as well
555        path = os.path.splitdrive(path)[1]
556        # Make relative path out of absolute
557        path = path[1:] if path[0] == os.sep else path
558        ast_path = os.path.join("ast", path + ".ast")
559        extdef_ast_list.append(mangled_name + " " + ast_path)
560    return extdef_ast_list
561
562
563@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'ctu'])
564def ctu_collect_phase(opts):
565    """ Preprocess source by generating all data needed by CTU analysis. """
566
567    def generate_ast(triple_arch):
568        """ Generates ASTs for the current compilation command. """
569
570        args = opts['direct_args'] + opts['flags']
571        ast_joined_path = os.path.join(opts['ctu'].dir, triple_arch, 'ast',
572                                       os.path.realpath(opts['file'])[1:] +
573                                       '.ast')
574        ast_path = os.path.abspath(ast_joined_path)
575        ast_dir = os.path.dirname(ast_path)
576        if not os.path.isdir(ast_dir):
577            try:
578                os.makedirs(ast_dir)
579            except OSError:
580                # In case an other process already created it.
581                pass
582        ast_command = [opts['clang'], '-emit-ast']
583        ast_command.extend(args)
584        ast_command.append('-w')
585        ast_command.append(opts['file'])
586        ast_command.append('-o')
587        ast_command.append(ast_path)
588        logging.debug("Generating AST using '%s'", ast_command)
589        run_command(ast_command, cwd=opts['directory'])
590
591    def map_extdefs(triple_arch):
592        """ Generate external definition map file for the current source. """
593
594        args = opts['direct_args'] + opts['flags']
595        extdefmap_command = [opts['ctu'].extdef_map_cmd]
596        extdefmap_command.append(opts['file'])
597        extdefmap_command.append('--')
598        extdefmap_command.extend(args)
599        logging.debug("Generating external definition map using '%s'",
600                      extdefmap_command)
601        extdef_src_list = run_command(extdefmap_command, cwd=opts['directory'])
602        extdef_ast_list = extdef_map_list_src_to_ast(extdef_src_list)
603        extern_defs_map_folder = os.path.join(opts['ctu'].dir, triple_arch,
604                                             CTU_TEMP_DEFMAP_FOLDER)
605        if not os.path.isdir(extern_defs_map_folder):
606            try:
607                os.makedirs(extern_defs_map_folder)
608            except OSError:
609                # In case an other process already created it.
610                pass
611        if extdef_ast_list:
612            with tempfile.NamedTemporaryFile(mode='w',
613                                             dir=extern_defs_map_folder,
614                                             delete=False) as out_file:
615                out_file.write("\n".join(extdef_ast_list) + "\n")
616
617    cwd = opts['directory']
618    cmd = [opts['clang'], '--analyze'] + opts['direct_args'] + opts['flags'] \
619        + [opts['file']]
620    triple_arch = get_triple_arch(cmd, cwd)
621    generate_ast(triple_arch)
622    map_extdefs(triple_arch)
623
624
625@require(['ctu'])
626def dispatch_ctu(opts, continuation=run_analyzer):
627    """ Execute only one phase of 2 phases of CTU if needed. """
628
629    ctu_config = opts['ctu']
630
631    if ctu_config.collect or ctu_config.analyze:
632        assert ctu_config.collect != ctu_config.analyze
633        if ctu_config.collect:
634            return ctu_collect_phase(opts)
635        if ctu_config.analyze:
636            cwd = opts['directory']
637            cmd = [opts['clang'], '--analyze'] + opts['direct_args'] \
638                + opts['flags'] + [opts['file']]
639            triarch = get_triple_arch(cmd, cwd)
640            ctu_options = ['ctu-dir=' + os.path.join(ctu_config.dir, triarch),
641                           'experimental-enable-naive-ctu-analysis=true']
642            analyzer_options = prefix_with('-analyzer-config', ctu_options)
643            direct_options = prefix_with('-Xanalyzer', analyzer_options)
644            opts['direct_args'].extend(direct_options)
645
646    return continuation(opts)
647
648
649@require(['flags', 'force_debug'])
650def filter_debug_flags(opts, continuation=dispatch_ctu):
651    """ Filter out nondebug macros when requested. """
652
653    if opts.pop('force_debug'):
654        # lazy implementation just append an undefine macro at the end
655        opts.update({'flags': opts['flags'] + ['-UNDEBUG']})
656
657    return continuation(opts)
658
659
660@require(['language', 'compiler', 'file', 'flags'])
661def language_check(opts, continuation=filter_debug_flags):
662    """ Find out the language from command line parameters or file name
663    extension. The decision also influenced by the compiler invocation. """
664
665    accepted = frozenset({
666        'c', 'c++', 'objective-c', 'objective-c++', 'c-cpp-output',
667        'c++-cpp-output', 'objective-c-cpp-output'
668    })
669
670    # language can be given as a parameter...
671    language = opts.pop('language')
672    compiler = opts.pop('compiler')
673    # ... or find out from source file extension
674    if language is None and compiler is not None:
675        language = classify_source(opts['file'], compiler == 'c')
676
677    if language is None:
678        logging.debug('skip analysis, language not known')
679        return None
680    elif language not in accepted:
681        logging.debug('skip analysis, language not supported')
682        return None
683    else:
684        logging.debug('analysis, language: %s', language)
685        opts.update({'language': language,
686                     'flags': ['-x', language] + opts['flags']})
687        return continuation(opts)
688
689
690@require(['arch_list', 'flags'])
691def arch_check(opts, continuation=language_check):
692    """ Do run analyzer through one of the given architectures. """
693
694    disabled = frozenset({'ppc', 'ppc64'})
695
696    received_list = opts.pop('arch_list')
697    if received_list:
698        # filter out disabled architectures and -arch switches
699        filtered_list = [a for a in received_list if a not in disabled]
700        if filtered_list:
701            # There should be only one arch given (or the same multiple
702            # times). If there are multiple arch are given and are not
703            # the same, those should not change the pre-processing step.
704            # But that's the only pass we have before run the analyzer.
705            current = filtered_list.pop()
706            logging.debug('analysis, on arch: %s', current)
707
708            opts.update({'flags': ['-arch', current] + opts['flags']})
709            return continuation(opts)
710        else:
711            logging.debug('skip analysis, found not supported arch')
712            return None
713    else:
714        logging.debug('analysis, on default arch')
715        return continuation(opts)
716
717
718# To have good results from static analyzer certain compiler options shall be
719# omitted. The compiler flag filtering only affects the static analyzer run.
720#
721# Keys are the option name, value number of options to skip
722IGNORED_FLAGS = {
723    '-c': 0,  # compile option will be overwritten
724    '-fsyntax-only': 0,  # static analyzer option will be overwritten
725    '-o': 1,  # will set up own output file
726    # flags below are inherited from the perl implementation.
727    '-g': 0,
728    '-save-temps': 0,
729    '-install_name': 1,
730    '-exported_symbols_list': 1,
731    '-current_version': 1,
732    '-compatibility_version': 1,
733    '-init': 1,
734    '-e': 1,
735    '-seg1addr': 1,
736    '-bundle_loader': 1,
737    '-multiply_defined': 1,
738    '-sectorder': 3,
739    '--param': 1,
740    '--serialize-diagnostics': 1
741}
742
743
744def classify_parameters(command):
745    """ Prepare compiler flags (filters some and add others) and take out
746    language (-x) and architecture (-arch) flags for future processing. """
747
748    result = {
749        'flags': [],  # the filtered compiler flags
750        'arch_list': [],  # list of architecture flags
751        'language': None,  # compilation language, None, if not specified
752        'compiler': compiler_language(command)  # 'c' or 'c++'
753    }
754
755    # iterate on the compile options
756    args = iter(command[1:])
757    for arg in args:
758        # take arch flags into a separate basket
759        if arg == '-arch':
760            result['arch_list'].append(next(args))
761        # take language
762        elif arg == '-x':
763            result['language'] = next(args)
764        # parameters which looks source file are not flags
765        elif re.match(r'^[^-].+', arg) and classify_source(arg):
766            pass
767        # ignore some flags
768        elif arg in IGNORED_FLAGS:
769            count = IGNORED_FLAGS[arg]
770            for _ in range(count):
771                next(args)
772        # we don't care about extra warnings, but we should suppress ones
773        # that we don't want to see.
774        elif re.match(r'^-W.+', arg) and not re.match(r'^-Wno-.+', arg):
775            pass
776        # and consider everything else as compilation flag.
777        else:
778            result['flags'].append(arg)
779
780    return result
781