1"""Facilities to analyze entire programs, including imported modules.
2
3Parse and analyze the source files of a program in the correct order
4(based on file dependencies), and collect the results.
5
6This module only directs a build, which is performed in multiple passes per
7file.  The individual passes are implemented in separate modules.
8
9The function build() is the main interface to this module.
10"""
11# TODO: More consistent terminology, e.g. path/fnam, module/id, state/file
12
13import contextlib
14import errno
15import gc
16import json
17import os
18import re
19import stat
20import sys
21import time
22import types
23
24from typing import (AbstractSet, Any, Dict, Iterable, Iterator, List, Sequence,
25                    Mapping, NamedTuple, Optional, Set, Tuple, Union, Callable, TextIO)
26from typing_extensions import ClassVar, Final, TYPE_CHECKING
27from mypy_extensions import TypedDict
28
29from mypy.nodes import MypyFile, ImportBase, Import, ImportFrom, ImportAll, SymbolTable
30from mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis
31from mypy.semanal import SemanticAnalyzer
32import mypy.semanal_main
33from mypy.checker import TypeChecker
34from mypy.indirection import TypeIndirectionVisitor
35from mypy.errors import Errors, CompileError, ErrorInfo, report_internal_error
36from mypy.util import (
37    DecodeError, decode_python_encoding, is_sub_path, get_mypy_comments, module_prefix,
38    read_py_file, hash_digest, is_typeshed_file, is_stub_package_file, get_top_two_prefixes
39)
40if TYPE_CHECKING:
41    from mypy.report import Reports  # Avoid unconditional slow import
42from mypy.fixup import fixup_module
43from mypy.modulefinder import (
44    BuildSource, compute_search_paths, FindModuleCache, SearchPaths, ModuleSearchResult,
45    ModuleNotFoundReason
46)
47from mypy.nodes import Expression
48from mypy.options import Options
49from mypy.parse import parse
50from mypy.stats import dump_type_stats
51from mypy.types import Type
52from mypy.version import __version__
53from mypy.plugin import Plugin, ChainedPlugin, ReportConfigContext
54from mypy.plugins.default import DefaultPlugin
55from mypy.fscache import FileSystemCache
56from mypy.metastore import MetadataStore, FilesystemMetadataStore, SqliteMetadataStore
57from mypy.typestate import TypeState, reset_global_state
58from mypy.renaming import VariableRenameVisitor
59from mypy.config_parser import parse_mypy_comments
60from mypy.freetree import free_tree
61from mypy.stubinfo import legacy_bundled_packages, is_legacy_bundled_package
62from mypy import errorcodes as codes
63
64
65# Switch to True to produce debug output related to fine-grained incremental
66# mode only that is useful during development. This produces only a subset of
67# output compared to --verbose output. We use a global flag to enable this so
68# that it's easy to enable this when running tests.
69DEBUG_FINE_GRAINED = False  # type: Final
70
71# These modules are special and should always come from typeshed.
72CORE_BUILTIN_MODULES = {
73    'builtins',
74    'typing',
75    'types',
76    'typing_extensions',
77    'mypy_extensions',
78    '_importlib_modulespec',
79    'sys',
80    'abc',
81}
82
83
84Graph = Dict[str, 'State']
85
86
87# TODO: Get rid of BuildResult.  We might as well return a BuildManager.
88class BuildResult:
89    """The result of a successful build.
90
91    Attributes:
92      manager: The build manager.
93      files:   Dictionary from module name to related AST node.
94      types:   Dictionary from parse tree node to its inferred type.
95      used_cache: Whether the build took advantage of a pre-existing cache
96      errors:  List of error messages.
97    """
98
99    def __init__(self, manager: 'BuildManager', graph: Graph) -> None:
100        self.manager = manager
101        self.graph = graph
102        self.files = manager.modules
103        self.types = manager.all_types  # Non-empty if export_types True in options
104        self.used_cache = manager.cache_enabled
105        self.errors = []  # type: List[str]  # Filled in by build if desired
106
107
108class BuildSourceSet:
109    """Efficiently test a file's membership in the set of build sources."""
110
111    def __init__(self, sources: List[BuildSource]) -> None:
112        self.source_text_present = False
113        self.source_modules = set()  # type: Set[str]
114        self.source_paths = set()  # type: Set[str]
115
116        for source in sources:
117            if source.text is not None:
118                self.source_text_present = True
119            elif source.path:
120                self.source_paths.add(source.path)
121            else:
122                self.source_modules.add(source.module)
123
124    def is_source(self, file: MypyFile) -> bool:
125        if file.path and file.path in self.source_paths:
126            return True
127        elif file._fullname in self.source_modules:
128            return True
129        elif self.source_text_present:
130            return True
131        else:
132            return False
133
134
135def build(sources: List[BuildSource],
136          options: Options,
137          alt_lib_path: Optional[str] = None,
138          flush_errors: Optional[Callable[[List[str], bool], None]] = None,
139          fscache: Optional[FileSystemCache] = None,
140          stdout: Optional[TextIO] = None,
141          stderr: Optional[TextIO] = None,
142          extra_plugins: Optional[Sequence[Plugin]] = None,
143          ) -> BuildResult:
144    """Analyze a program.
145
146    A single call to build performs parsing, semantic analysis and optionally
147    type checking for the program *and* all imported modules, recursively.
148
149    Return BuildResult if successful or only non-blocking errors were found;
150    otherwise raise CompileError.
151
152    If a flush_errors callback is provided, all error messages will be
153    passed to it and the errors and messages fields of BuildResult and
154    CompileError (respectively) will be empty. Otherwise those fields will
155    report any error messages.
156
157    Args:
158      sources: list of sources to build
159      options: build options
160      alt_lib_path: an additional directory for looking up library modules
161        (takes precedence over other directories)
162      flush_errors: optional function to flush errors after a file is processed
163      fscache: optionally a file-system cacher
164
165    """
166    # If we were not given a flush_errors, we use one that will populate those
167    # fields for callers that want the traditional API.
168    messages = []
169
170    def default_flush_errors(new_messages: List[str], is_serious: bool) -> None:
171        messages.extend(new_messages)
172
173    flush_errors = flush_errors or default_flush_errors
174    stdout = stdout or sys.stdout
175    stderr = stderr or sys.stderr
176    extra_plugins = extra_plugins or []
177
178    try:
179        result = _build(
180            sources, options, alt_lib_path, flush_errors, fscache, stdout, stderr, extra_plugins
181        )
182        result.errors = messages
183        return result
184    except CompileError as e:
185        # CompileErrors raised from an errors object carry all of the
186        # messages that have not been reported out by error streaming.
187        # Patch it up to contain either none or all none of the messages,
188        # depending on whether we are flushing errors.
189        serious = not e.use_stdout
190        flush_errors(e.messages, serious)
191        e.messages = messages
192        raise
193
194
195def _build(sources: List[BuildSource],
196           options: Options,
197           alt_lib_path: Optional[str],
198           flush_errors: Callable[[List[str], bool], None],
199           fscache: Optional[FileSystemCache],
200           stdout: TextIO,
201           stderr: TextIO,
202           extra_plugins: Sequence[Plugin],
203           ) -> BuildResult:
204    # This seems the most reasonable place to tune garbage collection.
205    gc.set_threshold(150 * 1000)
206
207    data_dir = default_data_dir()
208    fscache = fscache or FileSystemCache()
209
210    search_paths = compute_search_paths(sources, options, data_dir, alt_lib_path)
211
212    reports = None
213    if options.report_dirs:
214        # Import lazily to avoid slowing down startup.
215        from mypy.report import Reports  # noqa
216        reports = Reports(data_dir, options.report_dirs)
217
218    source_set = BuildSourceSet(sources)
219    cached_read = fscache.read
220    errors = Errors(options.show_error_context,
221                    options.show_column_numbers,
222                    options.show_error_codes,
223                    options.pretty,
224                    lambda path: read_py_file(path, cached_read, options.python_version),
225                    options.show_absolute_path,
226                    options.enabled_error_codes,
227                    options.disabled_error_codes,
228                    options.many_errors_threshold)
229    plugin, snapshot = load_plugins(options, errors, stdout, extra_plugins)
230
231    # Add catch-all .gitignore to cache dir if we created it
232    cache_dir_existed = os.path.isdir(options.cache_dir)
233
234    # Construct a build manager object to hold state during the build.
235    #
236    # Ignore current directory prefix in error messages.
237    manager = BuildManager(data_dir, search_paths,
238                           ignore_prefix=os.getcwd(),
239                           source_set=source_set,
240                           reports=reports,
241                           options=options,
242                           version_id=__version__,
243                           plugin=plugin,
244                           plugins_snapshot=snapshot,
245                           errors=errors,
246                           flush_errors=flush_errors,
247                           fscache=fscache,
248                           stdout=stdout,
249                           stderr=stderr)
250    manager.trace(repr(options))
251
252    reset_global_state()
253    try:
254        graph = dispatch(sources, manager, stdout)
255        if not options.fine_grained_incremental:
256            TypeState.reset_all_subtype_caches()
257        return BuildResult(manager, graph)
258    finally:
259        t0 = time.time()
260        manager.metastore.commit()
261        manager.add_stats(cache_commit_time=time.time() - t0)
262        manager.log("Build finished in %.3f seconds with %d modules, and %d errors" %
263                    (time.time() - manager.start_time,
264                     len(manager.modules),
265                     manager.errors.num_messages()))
266        manager.dump_stats()
267        if reports is not None:
268            # Finish the HTML or XML reports even if CompileError was raised.
269            reports.finish()
270        if not cache_dir_existed and os.path.isdir(options.cache_dir):
271            add_catch_all_gitignore(options.cache_dir)
272            exclude_from_backups(options.cache_dir)
273        if os.path.isdir(options.cache_dir):
274            record_missing_stub_packages(options.cache_dir, manager.missing_stub_packages)
275
276
277def default_data_dir() -> str:
278    """Returns directory containing typeshed directory."""
279    return os.path.dirname(__file__)
280
281
282def normpath(path: str, options: Options) -> str:
283    """Convert path to absolute; but to relative in bazel mode.
284
285    (Bazel's distributed cache doesn't like filesystem metadata to
286    end up in output files.)
287    """
288    # TODO: Could we always use relpath?  (A worry in non-bazel
289    # mode would be that a moved file may change its full module
290    # name without changing its size, mtime or hash.)
291    if options.bazel:
292        return os.path.relpath(path)
293    else:
294        return os.path.abspath(path)
295
296
297CacheMeta = NamedTuple('CacheMeta',
298                       [('id', str),
299                        ('path', str),
300                        ('mtime', int),
301                        ('size', int),
302                        ('hash', str),
303                        ('dependencies', List[str]),  # names of imported modules
304                        ('data_mtime', int),  # mtime of data_json
305                        ('data_json', str),  # path of <id>.data.json
306                        ('suppressed', List[str]),  # dependencies that weren't imported
307                        ('options', Optional[Dict[str, object]]),  # build options
308                        # dep_prios and dep_lines are in parallel with
309                        # dependencies + suppressed.
310                        ('dep_prios', List[int]),
311                        ('dep_lines', List[int]),
312                        ('interface_hash', str),  # hash representing the public interface
313                        ('version_id', str),  # mypy version for cache invalidation
314                        ('ignore_all', bool),  # if errors were ignored
315                        ('plugin_data', Any),  # config data from plugins
316                        ])
317# NOTE: dependencies + suppressed == all reachable imports;
318# suppressed contains those reachable imports that were prevented by
319# silent mode or simply not found.
320
321# Metadata for the fine-grained dependencies file associated with a module.
322FgDepMeta = TypedDict('FgDepMeta', {'path': str, 'mtime': int})
323
324
325def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta:
326    """Build a CacheMeta object from a json metadata dictionary
327
328    Args:
329      meta: JSON metadata read from the metadata cache file
330      data_json: Path to the .data.json file containing the AST trees
331    """
332    sentinel = None  # type: Any  # Values to be validated by the caller
333    return CacheMeta(
334        meta.get('id', sentinel),
335        meta.get('path', sentinel),
336        int(meta['mtime']) if 'mtime' in meta else sentinel,
337        meta.get('size', sentinel),
338        meta.get('hash', sentinel),
339        meta.get('dependencies', []),
340        int(meta['data_mtime']) if 'data_mtime' in meta else sentinel,
341        data_json,
342        meta.get('suppressed', []),
343        meta.get('options'),
344        meta.get('dep_prios', []),
345        meta.get('dep_lines', []),
346        meta.get('interface_hash', ''),
347        meta.get('version_id', sentinel),
348        meta.get('ignore_all', True),
349        meta.get('plugin_data', None),
350    )
351
352
353# Priorities used for imports.  (Here, top-level includes inside a class.)
354# These are used to determine a more predictable order in which the
355# nodes in an import cycle are processed.
356PRI_HIGH = 5  # type: Final  # top-level "from X import blah"
357PRI_MED = 10  # type: Final  # top-level "import X"
358PRI_LOW = 20  # type: Final  # either form inside a function
359PRI_MYPY = 25  # type: Final  # inside "if MYPY" or "if TYPE_CHECKING"
360PRI_INDIRECT = 30  # type: Final  # an indirect dependency
361PRI_ALL = 99  # type: Final  # include all priorities
362
363
364def import_priority(imp: ImportBase, toplevel_priority: int) -> int:
365    """Compute import priority from an import node."""
366    if not imp.is_top_level:
367        # Inside a function
368        return PRI_LOW
369    if imp.is_mypy_only:
370        # Inside "if MYPY" or "if typing.TYPE_CHECKING"
371        return max(PRI_MYPY, toplevel_priority)
372    # A regular import; priority determined by argument.
373    return toplevel_priority
374
375
376def load_plugins_from_config(
377    options: Options, errors: Errors, stdout: TextIO
378) -> Tuple[List[Plugin], Dict[str, str]]:
379    """Load all configured plugins.
380
381    Return a list of all the loaded plugins from the config file.
382    The second return value is a snapshot of versions/hashes of loaded user
383    plugins (for cache validation).
384    """
385    import importlib
386    snapshot = {}  # type: Dict[str, str]
387
388    if not options.config_file:
389        return [], snapshot
390
391    line = find_config_file_line_number(options.config_file, 'mypy', 'plugins')
392    if line == -1:
393        line = 1  # We need to pick some line number that doesn't look too confusing
394
395    def plugin_error(message: str) -> None:
396        errors.report(line, 0, message)
397        errors.raise_error(use_stdout=False)
398
399    custom_plugins = []  # type: List[Plugin]
400    errors.set_file(options.config_file, None)
401    for plugin_path in options.plugins:
402        func_name = 'plugin'
403        plugin_dir = None  # type: Optional[str]
404        if ':' in os.path.basename(plugin_path):
405            plugin_path, func_name = plugin_path.rsplit(':', 1)
406        if plugin_path.endswith('.py'):
407            # Plugin paths can be relative to the config file location.
408            plugin_path = os.path.join(os.path.dirname(options.config_file), plugin_path)
409            if not os.path.isfile(plugin_path):
410                plugin_error('Can\'t find plugin "{}"'.format(plugin_path))
411            # Use an absolute path to avoid populating the cache entry
412            # for 'tmp' during tests, since it will be different in
413            # different tests.
414            plugin_dir = os.path.abspath(os.path.dirname(plugin_path))
415            fnam = os.path.basename(plugin_path)
416            module_name = fnam[:-3]
417            sys.path.insert(0, plugin_dir)
418        elif re.search(r'[\\/]', plugin_path):
419            fnam = os.path.basename(plugin_path)
420            plugin_error('Plugin "{}" does not have a .py extension'.format(fnam))
421        else:
422            module_name = plugin_path
423
424        try:
425            module = importlib.import_module(module_name)
426        except Exception as exc:
427            plugin_error('Error importing plugin "{}": {}'.format(plugin_path, exc))
428        finally:
429            if plugin_dir is not None:
430                assert sys.path[0] == plugin_dir
431                del sys.path[0]
432
433        if not hasattr(module, func_name):
434            plugin_error('Plugin "{}" does not define entry point function "{}"'.format(
435                plugin_path, func_name))
436
437        try:
438            plugin_type = getattr(module, func_name)(__version__)
439        except Exception:
440            print('Error calling the plugin(version) entry point of {}\n'.format(plugin_path),
441                  file=stdout)
442            raise  # Propagate to display traceback
443
444        if not isinstance(plugin_type, type):
445            plugin_error(
446                'Type object expected as the return value of "plugin"; got {!r} (in {})'.format(
447                    plugin_type, plugin_path))
448        if not issubclass(plugin_type, Plugin):
449            plugin_error(
450                'Return value of "plugin" must be a subclass of "mypy.plugin.Plugin" '
451                '(in {})'.format(plugin_path))
452        try:
453            custom_plugins.append(plugin_type(options))
454            snapshot[module_name] = take_module_snapshot(module)
455        except Exception:
456            print('Error constructing plugin instance of {}\n'.format(plugin_type.__name__),
457                  file=stdout)
458            raise  # Propagate to display traceback
459
460    return custom_plugins, snapshot
461
462
463def load_plugins(options: Options,
464                 errors: Errors,
465                 stdout: TextIO,
466                 extra_plugins: Sequence[Plugin],
467                 ) -> Tuple[Plugin, Dict[str, str]]:
468    """Load all configured plugins.
469
470    Return a plugin that encapsulates all plugins chained together. Always
471    at least include the default plugin (it's last in the chain).
472    The second return value is a snapshot of versions/hashes of loaded user
473    plugins (for cache validation).
474    """
475    custom_plugins, snapshot = load_plugins_from_config(options, errors, stdout)
476
477    custom_plugins += extra_plugins
478
479    default_plugin = DefaultPlugin(options)  # type: Plugin
480    if not custom_plugins:
481        return default_plugin, snapshot
482
483    # Custom plugins take precedence over the default plugin.
484    return ChainedPlugin(options, custom_plugins + [default_plugin]), snapshot
485
486
487def take_module_snapshot(module: types.ModuleType) -> str:
488    """Take plugin module snapshot by recording its version and hash.
489
490    We record _both_ hash and the version to detect more possible changes
491    (e.g. if there is a change in modules imported by a plugin).
492    """
493    if hasattr(module, '__file__'):
494        with open(module.__file__, 'rb') as f:
495            digest = hash_digest(f.read())
496    else:
497        digest = 'unknown'
498    ver = getattr(module, '__version__', 'none')
499    return '{}:{}'.format(ver, digest)
500
501
502def find_config_file_line_number(path: str, section: str, setting_name: str) -> int:
503    """Return the approximate location of setting_name within mypy config file.
504
505    Return -1 if can't determine the line unambiguously.
506    """
507    in_desired_section = False
508    try:
509        results = []
510        with open(path) as f:
511            for i, line in enumerate(f):
512                line = line.strip()
513                if line.startswith('[') and line.endswith(']'):
514                    current_section = line[1:-1].strip()
515                    in_desired_section = (current_section == section)
516                elif in_desired_section and re.match(r'{}\s*='.format(setting_name), line):
517                    results.append(i + 1)
518        if len(results) == 1:
519            return results[0]
520    except OSError:
521        pass
522    return -1
523
524
525class BuildManager:
526    """This class holds shared state for building a mypy program.
527
528    It is used to coordinate parsing, import processing, semantic
529    analysis and type checking.  The actual build steps are carried
530    out by dispatch().
531
532    Attributes:
533      data_dir:        Mypy data directory (contains stubs)
534      search_paths:    SearchPaths instance indicating where to look for modules
535      modules:         Mapping of module ID to MypyFile (shared by the passes)
536      semantic_analyzer:
537                       Semantic analyzer, pass 2
538      semantic_analyzer_pass3:
539                       Semantic analyzer, pass 3
540      all_types:       Map {Expression: Type} from all modules (enabled by export_types)
541      options:         Build options
542      missing_modules: Set of modules that could not be imported encountered so far
543      stale_modules:   Set of modules that needed to be rechecked (only used by tests)
544      fg_deps_meta:    Metadata for fine-grained dependencies caches associated with modules
545      fg_deps:         A fine-grained dependency map
546      version_id:      The current mypy version (based on commit id when possible)
547      plugin:          Active mypy plugin(s)
548      plugins_snapshot:
549                       Snapshot of currently active user plugins (versions and hashes)
550      old_plugins_snapshot:
551                       Plugins snapshot from previous incremental run (or None in
552                       non-incremental mode and if cache was not found)
553      errors:          Used for reporting all errors
554      flush_errors:    A function for processing errors after each SCC
555      cache_enabled:   Whether cache is being read. This is set based on options,
556                       but is disabled if fine-grained cache loading fails
557                       and after an initial fine-grained load. This doesn't
558                       determine whether we write cache files or not.
559      quickstart_state:
560                       A cache of filename -> mtime/size/hash info used to avoid
561                       needing to hash source files when using a cache with mismatching mtimes
562      stats:           Dict with various instrumentation numbers, it is used
563                       not only for debugging, but also required for correctness,
564                       in particular to check consistency of the fine-grained dependency cache.
565      fscache:         A file system cacher
566      ast_cache:       AST cache to speed up mypy daemon
567    """
568
569    def __init__(self, data_dir: str,
570                 search_paths: SearchPaths,
571                 ignore_prefix: str,
572                 source_set: BuildSourceSet,
573                 reports: 'Optional[Reports]',
574                 options: Options,
575                 version_id: str,
576                 plugin: Plugin,
577                 plugins_snapshot: Dict[str, str],
578                 errors: Errors,
579                 flush_errors: Callable[[List[str], bool], None],
580                 fscache: FileSystemCache,
581                 stdout: TextIO,
582                 stderr: TextIO,
583                 ) -> None:
584        self.stats = {}  # type: Dict[str, Any]  # Values are ints or floats
585        self.stdout = stdout
586        self.stderr = stderr
587        self.start_time = time.time()
588        self.data_dir = data_dir
589        self.errors = errors
590        self.errors.set_ignore_prefix(ignore_prefix)
591        self.search_paths = search_paths
592        self.source_set = source_set
593        self.reports = reports
594        self.options = options
595        self.version_id = version_id
596        self.modules = {}  # type: Dict[str, MypyFile]
597        self.missing_modules = set()  # type: Set[str]
598        self.fg_deps_meta = {}  # type: Dict[str, FgDepMeta]
599        # fg_deps holds the dependencies of every module that has been
600        # processed. We store this in BuildManager so that we can compute
601        # dependencies as we go, which allows us to free ASTs and type information,
602        # saving a ton of memory on net.
603        self.fg_deps = {}  # type: Dict[str, Set[str]]
604        # Always convert the plugin to a ChainedPlugin so that it can be manipulated if needed
605        if not isinstance(plugin, ChainedPlugin):
606            plugin = ChainedPlugin(options, [plugin])
607        self.plugin = plugin
608        # Set of namespaces (module or class) that are being populated during semantic
609        # analysis and may have missing definitions.
610        self.incomplete_namespaces = set()  # type: Set[str]
611        self.semantic_analyzer = SemanticAnalyzer(
612            self.modules,
613            self.missing_modules,
614            self.incomplete_namespaces,
615            self.errors,
616            self.plugin)
617        self.all_types = {}  # type: Dict[Expression, Type]  # Enabled by export_types
618        self.indirection_detector = TypeIndirectionVisitor()
619        self.stale_modules = set()  # type: Set[str]
620        self.rechecked_modules = set()  # type: Set[str]
621        self.flush_errors = flush_errors
622        has_reporters = reports is not None and reports.reporters
623        self.cache_enabled = (options.incremental
624                              and (not options.fine_grained_incremental
625                                   or options.use_fine_grained_cache)
626                              and not has_reporters)
627        self.fscache = fscache
628        self.find_module_cache = FindModuleCache(self.search_paths, self.fscache, self.options)
629        self.metastore = create_metastore(options)
630
631        # a mapping from source files to their corresponding shadow files
632        # for efficient lookup
633        self.shadow_map = {}  # type: Dict[str, str]
634        if self.options.shadow_file is not None:
635            self.shadow_map = {source_file: shadow_file
636                               for (source_file, shadow_file)
637                               in self.options.shadow_file}
638        # a mapping from each file being typechecked to its possible shadow file
639        self.shadow_equivalence_map = {}  # type: Dict[str, Optional[str]]
640        self.plugin = plugin
641        self.plugins_snapshot = plugins_snapshot
642        self.old_plugins_snapshot = read_plugins_snapshot(self)
643        self.quickstart_state = read_quickstart_file(options, self.stdout)
644        # Fine grained targets (module top levels and top level functions) processed by
645        # the semantic analyzer, used only for testing. Currently used only by the new
646        # semantic analyzer.
647        self.processed_targets = []  # type: List[str]
648        # Missing stub packages encountered.
649        self.missing_stub_packages = set()  # type: Set[str]
650        # Cache for mypy ASTs that have completed semantic analysis
651        # pass 1. When multiple files are added to the build in a
652        # single daemon increment, only one of the files gets added
653        # per step and the others are discarded. This gets repeated
654        # until all the files have been added. This means that a
655        # new file can be processed O(n**2) times. This cache
656        # avoids most of this redundant work.
657        self.ast_cache = {}  # type: Dict[str, Tuple[MypyFile, List[ErrorInfo]]]
658
659    def dump_stats(self) -> None:
660        if self.options.dump_build_stats:
661            print("Stats:")
662            for key, value in sorted(self.stats_summary().items()):
663                print("{:24}{}".format(key + ":", value))
664
665    def use_fine_grained_cache(self) -> bool:
666        return self.cache_enabled and self.options.use_fine_grained_cache
667
668    def maybe_swap_for_shadow_path(self, path: str) -> str:
669        if not self.shadow_map:
670            return path
671
672        path = normpath(path, self.options)
673
674        previously_checked = path in self.shadow_equivalence_map
675        if not previously_checked:
676            for source, shadow in self.shadow_map.items():
677                if self.fscache.samefile(path, source):
678                    self.shadow_equivalence_map[path] = shadow
679                    break
680                else:
681                    self.shadow_equivalence_map[path] = None
682
683        shadow_file = self.shadow_equivalence_map.get(path)
684        return shadow_file if shadow_file else path
685
686    def get_stat(self, path: str) -> os.stat_result:
687        return self.fscache.stat(self.maybe_swap_for_shadow_path(path))
688
689    def getmtime(self, path: str) -> int:
690        """Return a file's mtime; but 0 in bazel mode.
691
692        (Bazel's distributed cache doesn't like filesystem metadata to
693        end up in output files.)
694        """
695        if self.options.bazel:
696            return 0
697        else:
698            return int(self.metastore.getmtime(path))
699
700    def all_imported_modules_in_file(self,
701                                     file: MypyFile) -> List[Tuple[int, str, int]]:
702        """Find all reachable import statements in a file.
703
704        Return list of tuples (priority, module id, import line number)
705        for all modules imported in file; lower numbers == higher priority.
706
707        Can generate blocking errors on bogus relative imports.
708        """
709
710        def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
711            """Function to correct for relative imports."""
712            file_id = file.fullname
713            rel = imp.relative
714            if rel == 0:
715                return imp.id
716            if os.path.basename(file.path).startswith('__init__.'):
717                rel -= 1
718            if rel != 0:
719                file_id = ".".join(file_id.split(".")[:-rel])
720            new_id = file_id + "." + imp.id if imp.id else file_id
721
722            if not new_id:
723                self.errors.set_file(file.path, file.name)
724                self.errors.report(imp.line, 0,
725                                   "No parent module -- cannot perform relative import",
726                                   blocker=True)
727
728            return new_id
729
730        res = []  # type: List[Tuple[int, str, int]]
731        for imp in file.imports:
732            if not imp.is_unreachable:
733                if isinstance(imp, Import):
734                    pri = import_priority(imp, PRI_MED)
735                    ancestor_pri = import_priority(imp, PRI_LOW)
736                    for id, _ in imp.ids:
737                        # We append the target (e.g. foo.bar.baz)
738                        # before the ancestors (e.g. foo and foo.bar)
739                        # so that, if FindModuleCache finds the target
740                        # module in a package marked with py.typed
741                        # underneath a namespace package installed in
742                        # site-packages, (gasp), that cache's
743                        # knowledge of the ancestors can be primed
744                        # when it is asked to find the target.
745                        res.append((pri, id, imp.line))
746                        ancestor_parts = id.split(".")[:-1]
747                        ancestors = []
748                        for part in ancestor_parts:
749                            ancestors.append(part)
750                            res.append((ancestor_pri, ".".join(ancestors), imp.line))
751                elif isinstance(imp, ImportFrom):
752                    cur_id = correct_rel_imp(imp)
753                    pos = len(res)
754                    all_are_submodules = True
755                    # Also add any imported names that are submodules.
756                    pri = import_priority(imp, PRI_MED)
757                    for name, __ in imp.names:
758                        sub_id = cur_id + '.' + name
759                        if self.is_module(sub_id):
760                            res.append((pri, sub_id, imp.line))
761                        else:
762                            all_are_submodules = False
763                    # Add cur_id as a dependency, even if all of the
764                    # imports are submodules. Processing import from will try
765                    # to look through cur_id, so we should depend on it.
766                    # As a workaround for for some bugs in cycle handling (#4498),
767                    # if all of the imports are submodules, do the import at a lower
768                    # priority.
769                    pri = import_priority(imp, PRI_HIGH if not all_are_submodules else PRI_LOW)
770                    res.insert(pos, ((pri, cur_id, imp.line)))
771                elif isinstance(imp, ImportAll):
772                    pri = import_priority(imp, PRI_HIGH)
773                    res.append((pri, correct_rel_imp(imp), imp.line))
774
775        return res
776
777    def is_module(self, id: str) -> bool:
778        """Is there a file in the file system corresponding to module id?"""
779        return find_module_simple(id, self) is not None
780
781    def parse_file(self, id: str, path: str, source: str, ignore_errors: bool,
782                   options: Options) -> MypyFile:
783        """Parse the source of a file with the given name.
784
785        Raise CompileError if there is a parse error.
786        """
787        t0 = time.time()
788        tree = parse(source, path, id, self.errors, options=options)
789        tree._fullname = id
790        self.add_stats(files_parsed=1,
791                       modules_parsed=int(not tree.is_stub),
792                       stubs_parsed=int(tree.is_stub),
793                       parse_time=time.time() - t0)
794
795        if self.errors.is_blockers():
796            self.log("Bailing due to parse errors")
797            self.errors.raise_error()
798
799        self.errors.set_file_ignored_lines(path, tree.ignored_lines, ignore_errors)
800        return tree
801
802    def load_fine_grained_deps(self, id: str) -> Dict[str, Set[str]]:
803        t0 = time.time()
804        if id in self.fg_deps_meta:
805            # TODO: Assert deps file wasn't changed.
806            deps = json.loads(self.metastore.read(self.fg_deps_meta[id]['path']))
807        else:
808            deps = {}
809        val = {k: set(v) for k, v in deps.items()}
810        self.add_stats(load_fg_deps_time=time.time() - t0)
811        return val
812
813    def report_file(self,
814                    file: MypyFile,
815                    type_map: Dict[Expression, Type],
816                    options: Options) -> None:
817        if self.reports is not None and self.source_set.is_source(file):
818            self.reports.file(file, self.modules, type_map, options)
819
820    def verbosity(self) -> int:
821        return self.options.verbosity
822
823    def log(self, *message: str) -> None:
824        if self.verbosity() >= 1:
825            if message:
826                print('LOG: ', *message, file=self.stderr)
827            else:
828                print(file=self.stderr)
829            self.stderr.flush()
830
831    def log_fine_grained(self, *message: str) -> None:
832        import mypy.build
833        if self.verbosity() >= 1:
834            self.log('fine-grained:', *message)
835        elif mypy.build.DEBUG_FINE_GRAINED:
836            # Output log in a simplified format that is quick to browse.
837            if message:
838                print(*message, file=self.stderr)
839            else:
840                print(file=self.stderr)
841            self.stderr.flush()
842
843    def trace(self, *message: str) -> None:
844        if self.verbosity() >= 2:
845            print('TRACE:', *message, file=self.stderr)
846            self.stderr.flush()
847
848    def add_stats(self, **kwds: Any) -> None:
849        for key, value in kwds.items():
850            if key in self.stats:
851                self.stats[key] += value
852            else:
853                self.stats[key] = value
854
855    def stats_summary(self) -> Mapping[str, object]:
856        return self.stats
857
858
859def deps_to_json(x: Dict[str, Set[str]]) -> str:
860    return json.dumps({k: list(v) for k, v in x.items()})
861
862
863# File for storing metadata about all the fine-grained dependency caches
864DEPS_META_FILE = '@deps.meta.json'  # type: Final
865# File for storing fine-grained dependencies that didn't a parent in the build
866DEPS_ROOT_FILE = '@root.deps.json'  # type: Final
867
868# The name of the fake module used to store fine-grained dependencies that
869# have no other place to go.
870FAKE_ROOT_MODULE = '@root'  # type: Final
871
872
873def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]],
874                     manager: BuildManager, graph: Graph) -> None:
875    """Write cache files for fine-grained dependencies.
876
877    Serialize fine-grained dependencies map for fine grained mode.
878
879    Dependencies on some module 'm' is stored in the dependency cache
880    file m.deps.json.  This entails some spooky action at a distance:
881    if module 'n' depends on 'm', that produces entries in m.deps.json.
882    When there is a dependency on a module that does not exist in the
883    build, it is stored with its first existing parent module. If no
884    such module exists, it is stored with the fake module FAKE_ROOT_MODULE.
885
886    This means that the validity of the fine-grained dependency caches
887    are a global property, so we store validity checking information for
888    fine-grained dependencies in a global cache file:
889     * We take a snapshot of current sources to later check consistency
890       between the fine-grained dependency cache and module cache metadata
891     * We store the mtime of all of the dependency files to verify they
892       haven't changed
893    """
894    metastore = manager.metastore
895
896    error = False
897
898    fg_deps_meta = manager.fg_deps_meta.copy()
899
900    for id in rdeps:
901        if id != FAKE_ROOT_MODULE:
902            _, _, deps_json = get_cache_names(id, graph[id].xpath, manager.options)
903        else:
904            deps_json = DEPS_ROOT_FILE
905        assert deps_json
906        manager.log("Writing deps cache", deps_json)
907        if not manager.metastore.write(deps_json, deps_to_json(rdeps[id])):
908            manager.log("Error writing fine-grained deps JSON file {}".format(deps_json))
909            error = True
910        else:
911            fg_deps_meta[id] = {'path': deps_json, 'mtime': manager.getmtime(deps_json)}
912
913    meta_snapshot = {}  # type: Dict[str, str]
914    for id, st in graph.items():
915        # If we didn't parse a file (so it doesn't have a
916        # source_hash), then it must be a module with a fresh cache,
917        # so use the hash from that.
918        if st.source_hash:
919            hash = st.source_hash
920        else:
921            assert st.meta, "Module must be either parsed or cached"
922            hash = st.meta.hash
923        meta_snapshot[id] = hash
924
925    meta = {'snapshot': meta_snapshot, 'deps_meta': fg_deps_meta}
926
927    if not metastore.write(DEPS_META_FILE, json.dumps(meta)):
928        manager.log("Error writing fine-grained deps meta JSON file {}".format(DEPS_META_FILE))
929        error = True
930
931    if error:
932        manager.errors.set_file(_cache_dir_prefix(manager.options), None)
933        manager.errors.report(0, 0, "Error writing fine-grained dependencies cache",
934                              blocker=True)
935
936
937def invert_deps(deps: Dict[str, Set[str]],
938                graph: Graph) -> Dict[str, Dict[str, Set[str]]]:
939    """Splits fine-grained dependencies based on the module of the trigger.
940
941    Returns a dictionary from module ids to all dependencies on that
942    module. Dependencies not associated with a module in the build will be
943    associated with the nearest parent module that is in the build, or the
944    fake module FAKE_ROOT_MODULE if none are.
945    """
946    # Lazy import to speed up startup
947    from mypy.server.target import trigger_to_target
948
949    # Prepopulate the map for all the modules that have been processed,
950    # so that we always generate files for processed modules (even if
951    # there aren't any dependencies to them.)
952    rdeps = {id: {} for id, st in graph.items() if st.tree}  # type: Dict[str, Dict[str, Set[str]]]
953    for trigger, targets in deps.items():
954        module = module_prefix(graph, trigger_to_target(trigger))
955        if not module or not graph[module].tree:
956            module = FAKE_ROOT_MODULE
957
958        mod_rdeps = rdeps.setdefault(module, {})
959        mod_rdeps.setdefault(trigger, set()).update(targets)
960
961    return rdeps
962
963
964def generate_deps_for_cache(manager: BuildManager,
965                            graph: Graph) -> Dict[str, Dict[str, Set[str]]]:
966    """Generate fine-grained dependencies into a form suitable for serializing.
967
968    This does a couple things:
969    1. Splits fine-grained deps based on the module of the trigger
970    2. For each module we generated fine-grained deps for, load any previous
971       deps and merge them in.
972
973    Returns a dictionary from module ids to all dependencies on that
974    module. Dependencies not associated with a module in the build will be
975    associated with the nearest parent module that is in the build, or the
976    fake module FAKE_ROOT_MODULE if none are.
977    """
978    from mypy.server.deps import merge_dependencies  # Lazy import to speed up startup
979
980    # Split the dependencies out into based on the module that is depended on.
981    rdeps = invert_deps(manager.fg_deps, graph)
982
983    # We can't just clobber existing dependency information, so we
984    # load the deps for every module we've generated new dependencies
985    # to and merge the new deps into them.
986    for module, mdeps in rdeps.items():
987        old_deps = manager.load_fine_grained_deps(module)
988        merge_dependencies(old_deps, mdeps)
989
990    return rdeps
991
992
993PLUGIN_SNAPSHOT_FILE = '@plugins_snapshot.json'  # type: Final
994
995
996def write_plugins_snapshot(manager: BuildManager) -> None:
997    """Write snapshot of versions and hashes of currently active plugins."""
998    if not manager.metastore.write(PLUGIN_SNAPSHOT_FILE, json.dumps(manager.plugins_snapshot)):
999        manager.errors.set_file(_cache_dir_prefix(manager.options), None)
1000        manager.errors.report(0, 0, "Error writing plugins snapshot",
1001                              blocker=True)
1002
1003
1004def read_plugins_snapshot(manager: BuildManager) -> Optional[Dict[str, str]]:
1005    """Read cached snapshot of versions and hashes of plugins from previous run."""
1006    snapshot = _load_json_file(PLUGIN_SNAPSHOT_FILE, manager,
1007                               log_success='Plugins snapshot ',
1008                               log_error='Could not load plugins snapshot: ')
1009    if snapshot is None:
1010        return None
1011    if not isinstance(snapshot, dict):
1012        manager.log('Could not load plugins snapshot: cache is not a dict: {}'
1013                    .format(type(snapshot)))
1014        return None
1015    return snapshot
1016
1017
1018def read_quickstart_file(options: Options,
1019                         stdout: TextIO,
1020                         ) -> Optional[Dict[str, Tuple[float, int, str]]]:
1021    quickstart = None  # type: Optional[Dict[str, Tuple[float, int, str]]]
1022    if options.quickstart_file:
1023        # This is very "best effort". If the file is missing or malformed,
1024        # just ignore it.
1025        raw_quickstart = {}  # type: Dict[str, Any]
1026        try:
1027            with open(options.quickstart_file, "r") as f:
1028                raw_quickstart = json.load(f)
1029
1030            quickstart = {}
1031            for file, (x, y, z) in raw_quickstart.items():
1032                quickstart[file] = (x, y, z)
1033        except Exception as e:
1034            print("Warning: Failed to load quickstart file: {}\n".format(str(e)), file=stdout)
1035    return quickstart
1036
1037
1038def read_deps_cache(manager: BuildManager,
1039                    graph: Graph) -> Optional[Dict[str, FgDepMeta]]:
1040    """Read and validate the fine-grained dependencies cache.
1041
1042    See the write_deps_cache documentation for more information on
1043    the details of the cache.
1044
1045    Returns None if the cache was invalid in some way.
1046    """
1047    deps_meta = _load_json_file(DEPS_META_FILE, manager,
1048                                log_success='Deps meta ',
1049                                log_error='Could not load fine-grained dependency metadata: ')
1050    if deps_meta is None:
1051        return None
1052    meta_snapshot = deps_meta['snapshot']
1053    # Take a snapshot of the source hashes from all of the metas we found.
1054    # (Including the ones we rejected because they were out of date.)
1055    # We use this to verify that they match up with the proto_deps.
1056    current_meta_snapshot = {id: st.meta_source_hash for id, st in graph.items()
1057                             if st.meta_source_hash is not None}
1058
1059    common = set(meta_snapshot.keys()) & set(current_meta_snapshot.keys())
1060    if any(meta_snapshot[id] != current_meta_snapshot[id] for id in common):
1061        # TODO: invalidate also if options changed (like --strict-optional)?
1062        manager.log('Fine-grained dependencies cache inconsistent, ignoring')
1063        return None
1064
1065    module_deps_metas = deps_meta['deps_meta']
1066    if not manager.options.skip_cache_mtime_checks:
1067        for id, meta in module_deps_metas.items():
1068            try:
1069                matched = manager.getmtime(meta['path']) == meta['mtime']
1070            except FileNotFoundError:
1071                matched = False
1072            if not matched:
1073                manager.log('Invalid or missing fine-grained deps cache: {}'.format(meta['path']))
1074                return None
1075
1076    return module_deps_metas
1077
1078
1079def _load_json_file(file: str, manager: BuildManager,
1080                    log_success: str, log_error: str) -> Optional[Dict[str, Any]]:
1081    """A simple helper to read a JSON file with logging."""
1082    t0 = time.time()
1083    try:
1084        data = manager.metastore.read(file)
1085    except IOError:
1086        manager.log(log_error + file)
1087        return None
1088    manager.add_stats(metastore_read_time=time.time() - t0)
1089    # Only bother to compute the log message if we are logging it, since it could be big
1090    if manager.verbosity() >= 2:
1091        manager.trace(log_success + data.rstrip())
1092    try:
1093        result = json.loads(data)
1094    except ValueError:  # TODO: JSONDecodeError in 3.5
1095        manager.errors.set_file(file, None)
1096        manager.errors.report(-1, -1,
1097                              "Error reading JSON file;"
1098                              " you likely have a bad cache.\n"
1099                              "Try removing the {cache_dir} directory"
1100                              " and run mypy again.".format(
1101                                  cache_dir=manager.options.cache_dir
1102                              ),
1103                              blocker=True)
1104        return None
1105    else:
1106        return result
1107
1108
1109def _cache_dir_prefix(options: Options) -> str:
1110    """Get current cache directory (or file if id is given)."""
1111    if options.bazel:
1112        # This is needed so the cache map works.
1113        return os.curdir
1114    cache_dir = options.cache_dir
1115    pyversion = options.python_version
1116    base = os.path.join(cache_dir, '%d.%d' % pyversion)
1117    return base
1118
1119
1120def add_catch_all_gitignore(target_dir: str) -> None:
1121    """Add catch-all .gitignore to an existing directory.
1122
1123    No-op if the .gitignore already exists.
1124    """
1125    gitignore = os.path.join(target_dir, ".gitignore")
1126    try:
1127        with open(gitignore, "x") as f:
1128            print("# Automatically created by mypy", file=f)
1129            print("*", file=f)
1130    except FileExistsError:
1131        pass
1132
1133
1134def exclude_from_backups(target_dir: str) -> None:
1135    """Exclude the directory from various archives and backups supporting CACHEDIR.TAG.
1136
1137    If the CACHEDIR.TAG file exists the function is a no-op.
1138    """
1139    cachedir_tag = os.path.join(target_dir, "CACHEDIR.TAG")
1140    try:
1141        with open(cachedir_tag, "x") as f:
1142            f.write("""Signature: 8a477f597d28d172789f06886806bc55
1143# This file is a cache directory tag automatically created by mypy.
1144# For information about cache directory tags see https://bford.info/cachedir/
1145""")
1146    except FileExistsError:
1147        pass
1148
1149
1150def create_metastore(options: Options) -> MetadataStore:
1151    """Create the appropriate metadata store."""
1152    if options.sqlite_cache:
1153        mds = SqliteMetadataStore(_cache_dir_prefix(options))  # type: MetadataStore
1154    else:
1155        mds = FilesystemMetadataStore(_cache_dir_prefix(options))
1156    return mds
1157
1158
1159def get_cache_names(id: str, path: str, options: Options) -> Tuple[str, str, Optional[str]]:
1160    """Return the file names for the cache files.
1161
1162    Args:
1163      id: module ID
1164      path: module path
1165      cache_dir: cache directory
1166      pyversion: Python version (major, minor)
1167
1168    Returns:
1169      A tuple with the file names to be used for the meta JSON, the
1170      data JSON, and the fine-grained deps JSON, respectively.
1171    """
1172    if options.cache_map:
1173        pair = options.cache_map.get(normpath(path, options))
1174    else:
1175        pair = None
1176    if pair is not None:
1177        # The cache map paths were specified relative to the base directory,
1178        # but the filesystem metastore APIs operates relative to the cache
1179        # prefix directory.
1180        # Solve this by rewriting the paths as relative to the root dir.
1181        # This only makes sense when using the filesystem backed cache.
1182        root = _cache_dir_prefix(options)
1183        return (os.path.relpath(pair[0], root), os.path.relpath(pair[1], root), None)
1184    prefix = os.path.join(*id.split('.'))
1185    is_package = os.path.basename(path).startswith('__init__.py')
1186    if is_package:
1187        prefix = os.path.join(prefix, '__init__')
1188
1189    deps_json = None
1190    if options.cache_fine_grained:
1191        deps_json = prefix + '.deps.json'
1192    return (prefix + '.meta.json', prefix + '.data.json', deps_json)
1193
1194
1195def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]:
1196    """Find cache data for a module.
1197
1198    Args:
1199      id: module ID
1200      path: module path
1201      manager: the build manager (for pyversion, log/trace, and build options)
1202
1203    Returns:
1204      A CacheMeta instance if the cache data was found and appears
1205      valid; otherwise None.
1206    """
1207    # TODO: May need to take more build options into account
1208    meta_json, data_json, _ = get_cache_names(id, path, manager.options)
1209    manager.trace('Looking for {} at {}'.format(id, meta_json))
1210    t0 = time.time()
1211    meta = _load_json_file(meta_json, manager,
1212                           log_success='Meta {} '.format(id),
1213                           log_error='Could not load cache for {}: '.format(id))
1214    t1 = time.time()
1215    if meta is None:
1216        return None
1217    if not isinstance(meta, dict):
1218        manager.log('Could not load cache for {}: meta cache is not a dict: {}'
1219                    .format(id, repr(meta)))
1220        return None
1221    m = cache_meta_from_dict(meta, data_json)
1222    t2 = time.time()
1223    manager.add_stats(load_meta_time=t2 - t0,
1224                      load_meta_load_time=t1 - t0,
1225                      load_meta_from_dict_time=t2 - t1)
1226
1227    # Don't check for path match, that is dealt with in validate_meta().
1228    if (m.id != id or
1229            m.mtime is None or m.size is None or
1230            m.dependencies is None or m.data_mtime is None):
1231        manager.log('Metadata abandoned for {}: attributes are missing'.format(id))
1232        return None
1233
1234    # Ignore cache if generated by an older mypy version.
1235    if ((m.version_id != manager.version_id and not manager.options.skip_version_check)
1236            or m.options is None
1237            or len(m.dependencies) + len(m.suppressed) != len(m.dep_prios)
1238            or len(m.dependencies) + len(m.suppressed) != len(m.dep_lines)):
1239        manager.log('Metadata abandoned for {}: new attributes are missing'.format(id))
1240        return None
1241
1242    # Ignore cache if (relevant) options aren't the same.
1243    # Note that it's fine to mutilate cached_options since it's only used here.
1244    cached_options = m.options
1245    current_options = manager.options.clone_for_module(id).select_options_affecting_cache()
1246    if manager.options.skip_version_check:
1247        # When we're lax about version we're also lax about platform.
1248        cached_options['platform'] = current_options['platform']
1249    if 'debug_cache' in cached_options:
1250        # Older versions included debug_cache, but it's silly to compare it.
1251        del cached_options['debug_cache']
1252    if cached_options != current_options:
1253        manager.log('Metadata abandoned for {}: options differ'.format(id))
1254        if manager.options.verbosity >= 2:
1255            for key in sorted(set(cached_options) | set(current_options)):
1256                if cached_options.get(key) != current_options.get(key):
1257                    manager.trace('    {}: {} != {}'
1258                                  .format(key, cached_options.get(key), current_options.get(key)))
1259        return None
1260    if manager.old_plugins_snapshot and manager.plugins_snapshot:
1261        # Check if plugins are still the same.
1262        if manager.plugins_snapshot != manager.old_plugins_snapshot:
1263            manager.log('Metadata abandoned for {}: plugins differ'.format(id))
1264            return None
1265    # So that plugins can return data with tuples in it without
1266    # things silently always invalidating modules, we round-trip
1267    # the config data. This isn't beautiful.
1268    plugin_data = json.loads(json.dumps(
1269        manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=True))
1270    ))
1271    if m.plugin_data != plugin_data:
1272        manager.log('Metadata abandoned for {}: plugin configuration differs'.format(id))
1273        return None
1274
1275    manager.add_stats(fresh_metas=1)
1276    return m
1277
1278
1279def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
1280                  ignore_all: bool, manager: BuildManager) -> Optional[CacheMeta]:
1281    '''Checks whether the cached AST of this module can be used.
1282
1283    Returns:
1284      None, if the cached AST is unusable.
1285      Original meta, if mtime/size matched.
1286      Meta with mtime updated to match source file, if hash/size matched but mtime/path didn't.
1287    '''
1288    # This requires two steps. The first one is obvious: we check that the module source file
1289    # contents is the same as it was when the cache data file was created. The second one is not
1290    # too obvious: we check that the cache data file mtime has not changed; it is needed because
1291    # we use cache data file mtime to propagate information about changes in the dependencies.
1292
1293    if meta is None:
1294        manager.log('Metadata not found for {}'.format(id))
1295        return None
1296
1297    if meta.ignore_all and not ignore_all:
1298        manager.log('Metadata abandoned for {}: errors were previously ignored'.format(id))
1299        return None
1300
1301    t0 = time.time()
1302    bazel = manager.options.bazel
1303    assert path is not None, "Internal error: meta was provided without a path"
1304    if not manager.options.skip_cache_mtime_checks:
1305        # Check data_json; assume if its mtime matches it's good.
1306        # TODO: stat() errors
1307        data_mtime = manager.getmtime(meta.data_json)
1308        if data_mtime != meta.data_mtime:
1309            manager.log('Metadata abandoned for {}: data cache is modified'.format(id))
1310            return None
1311
1312    if bazel:
1313        # Normalize path under bazel to make sure it isn't absolute
1314        path = normpath(path, manager.options)
1315    try:
1316        st = manager.get_stat(path)
1317    except OSError:
1318        return None
1319    if not stat.S_ISREG(st.st_mode):
1320        manager.log('Metadata abandoned for {}: file {} does not exist'.format(id, path))
1321        return None
1322
1323    manager.add_stats(validate_stat_time=time.time() - t0)
1324
1325    # When we are using a fine-grained cache, we want our initial
1326    # build() to load all of the cache information and then do a
1327    # fine-grained incremental update to catch anything that has
1328    # changed since the cache was generated. We *don't* want to do a
1329    # coarse-grained incremental rebuild, so we accept the cache
1330    # metadata even if it doesn't match the source file.
1331    #
1332    # We still *do* the mtime/hash checks, however, to enable
1333    # fine-grained mode to take advantage of the mtime-updating
1334    # optimization when mtimes differ but hashes match.  There is
1335    # essentially no extra time cost to computing the hash here, since
1336    # it will be cached and will be needed for finding changed files
1337    # later anyways.
1338    fine_grained_cache = manager.use_fine_grained_cache()
1339
1340    size = st.st_size
1341    # Bazel ensures the cache is valid.
1342    if size != meta.size and not bazel and not fine_grained_cache:
1343        manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))
1344        return None
1345
1346    # Bazel ensures the cache is valid.
1347    mtime = 0 if bazel else int(st.st_mtime)
1348    if not bazel and (mtime != meta.mtime or path != meta.path):
1349        if manager.quickstart_state and path in manager.quickstart_state:
1350            # If the mtime and the size of the file recorded in the quickstart dump matches
1351            # what we see on disk, we know (assume) that the hash matches the quickstart
1352            # data as well. If that hash matches the hash in the metadata, then we know
1353            # the file is up to date even though the mtime is wrong, without needing to hash it.
1354            qmtime, qsize, qhash = manager.quickstart_state[path]
1355            if int(qmtime) == mtime and qsize == size and qhash == meta.hash:
1356                manager.log('Metadata fresh (by quickstart) for {}: file {}'.format(id, path))
1357                meta = meta._replace(mtime=mtime, path=path)
1358                return meta
1359
1360        t0 = time.time()
1361        try:
1362            source_hash = manager.fscache.hash_digest(path)
1363        except (OSError, UnicodeDecodeError, DecodeError):
1364            return None
1365        manager.add_stats(validate_hash_time=time.time() - t0)
1366        if source_hash != meta.hash:
1367            if fine_grained_cache:
1368                manager.log('Using stale metadata for {}: file {}'.format(id, path))
1369                return meta
1370            else:
1371                manager.log('Metadata abandoned for {}: file {} has different hash'.format(
1372                    id, path))
1373                return None
1374        else:
1375            t0 = time.time()
1376            # Optimization: update mtime and path (otherwise, this mismatch will reappear).
1377            meta = meta._replace(mtime=mtime, path=path)
1378            # Construct a dict we can pass to json.dumps() (compare to write_cache()).
1379            meta_dict = {
1380                'id': id,
1381                'path': path,
1382                'mtime': mtime,
1383                'size': size,
1384                'hash': source_hash,
1385                'data_mtime': meta.data_mtime,
1386                'dependencies': meta.dependencies,
1387                'suppressed': meta.suppressed,
1388                'options': (manager.options.clone_for_module(id)
1389                            .select_options_affecting_cache()),
1390                'dep_prios': meta.dep_prios,
1391                'dep_lines': meta.dep_lines,
1392                'interface_hash': meta.interface_hash,
1393                'version_id': manager.version_id,
1394                'ignore_all': meta.ignore_all,
1395                'plugin_data': meta.plugin_data,
1396            }
1397            if manager.options.debug_cache:
1398                meta_str = json.dumps(meta_dict, indent=2, sort_keys=True)
1399            else:
1400                meta_str = json.dumps(meta_dict)
1401            meta_json, _, _ = get_cache_names(id, path, manager.options)
1402            manager.log('Updating mtime for {}: file {}, meta {}, mtime {}'
1403                        .format(id, path, meta_json, meta.mtime))
1404            t1 = time.time()
1405            manager.metastore.write(meta_json, meta_str)  # Ignore errors, just an optimization.
1406            manager.add_stats(validate_update_time=time.time() - t1,
1407                              validate_munging_time=t1 - t0)
1408            return meta
1409
1410    # It's a match on (id, path, size, hash, mtime).
1411    manager.log('Metadata fresh for {}: file {}'.format(id, path))
1412    return meta
1413
1414
1415def compute_hash(text: str) -> str:
1416    # We use a crypto hash instead of the builtin hash(...) function
1417    # because the output of hash(...)  can differ between runs due to
1418    # hash randomization (enabled by default in Python 3.3).  See the
1419    # note in
1420    # https://docs.python.org/3/reference/datamodel.html#object.__hash__.
1421    return hash_digest(text.encode('utf-8'))
1422
1423
1424def json_dumps(obj: Any, debug_cache: bool) -> str:
1425    if debug_cache:
1426        return json.dumps(obj, indent=2, sort_keys=True)
1427    else:
1428        return json.dumps(obj, sort_keys=True)
1429
1430
1431def write_cache(id: str, path: str, tree: MypyFile,
1432                dependencies: List[str], suppressed: List[str],
1433                dep_prios: List[int], dep_lines: List[int],
1434                old_interface_hash: str, source_hash: str,
1435                ignore_all: bool, manager: BuildManager) -> Tuple[str, Optional[CacheMeta]]:
1436    """Write cache files for a module.
1437
1438    Note that this mypy's behavior is still correct when any given
1439    write_cache() call is replaced with a no-op, so error handling
1440    code that bails without writing anything is okay.
1441
1442    Args:
1443      id: module ID
1444      path: module path
1445      tree: the fully checked module data
1446      dependencies: module IDs on which this module depends
1447      suppressed: module IDs which were suppressed as dependencies
1448      dep_prios: priorities (parallel array to dependencies)
1449      dep_lines: import line locations (parallel array to dependencies)
1450      old_interface_hash: the hash from the previous version of the data cache file
1451      source_hash: the hash of the source code
1452      ignore_all: the ignore_all flag for this module
1453      manager: the build manager (for pyversion, log/trace)
1454
1455    Returns:
1456      A tuple containing the interface hash and CacheMeta
1457      corresponding to the metadata that was written (the latter may
1458      be None if the cache could not be written).
1459    """
1460    metastore = manager.metastore
1461    # For Bazel we use relative paths and zero mtimes.
1462    bazel = manager.options.bazel
1463
1464    # Obtain file paths.
1465    meta_json, data_json, _ = get_cache_names(id, path, manager.options)
1466    manager.log('Writing {} {} {} {}'.format(
1467        id, path, meta_json, data_json))
1468
1469    # Update tree.path so that in bazel mode it's made relative (since
1470    # sometimes paths leak out).
1471    if bazel:
1472        tree.path = path
1473
1474    # Serialize data and analyze interface
1475    data = tree.serialize()
1476    data_str = json_dumps(data, manager.options.debug_cache)
1477    interface_hash = compute_hash(data_str)
1478
1479    plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
1480
1481    # Obtain and set up metadata
1482    try:
1483        st = manager.get_stat(path)
1484    except OSError as err:
1485        manager.log("Cannot get stat for {}: {}".format(path, err))
1486        # Remove apparently-invalid cache files.
1487        # (This is purely an optimization.)
1488        for filename in [data_json, meta_json]:
1489            try:
1490                os.remove(filename)
1491            except OSError:
1492                pass
1493        # Still return the interface hash we computed.
1494        return interface_hash, None
1495
1496    # Write data cache file, if applicable
1497    # Note that for Bazel we don't record the data file's mtime.
1498    if old_interface_hash == interface_hash:
1499        # If the interface is unchanged, the cached data is guaranteed
1500        # to be equivalent, and we only need to update the metadata.
1501        data_mtime = manager.getmtime(data_json)
1502        manager.trace("Interface for {} is unchanged".format(id))
1503    else:
1504        manager.trace("Interface for {} has changed".format(id))
1505        if not metastore.write(data_json, data_str):
1506            # Most likely the error is the replace() call
1507            # (see https://github.com/python/mypy/issues/3215).
1508            manager.log("Error writing data JSON file {}".format(data_json))
1509            # Let's continue without writing the meta file.  Analysis:
1510            # If the replace failed, we've changed nothing except left
1511            # behind an extraneous temporary file; if the replace
1512            # worked but the getmtime() call failed, the meta file
1513            # will be considered invalid on the next run because the
1514            # data_mtime field won't match the data file's mtime.
1515            # Both have the effect of slowing down the next run a
1516            # little bit due to an out-of-date cache file.
1517            return interface_hash, None
1518        data_mtime = manager.getmtime(data_json)
1519
1520    mtime = 0 if bazel else int(st.st_mtime)
1521    size = st.st_size
1522    # Note that the options we store in the cache are the options as
1523    # specified by the command line/config file and *don't* reflect
1524    # updates made by inline config directives in the file. This is
1525    # important, or otherwise the options would never match when
1526    # verifying the cache.
1527    options = manager.options.clone_for_module(id)
1528    assert source_hash is not None
1529    meta = {'id': id,
1530            'path': path,
1531            'mtime': mtime,
1532            'size': size,
1533            'hash': source_hash,
1534            'data_mtime': data_mtime,
1535            'dependencies': dependencies,
1536            'suppressed': suppressed,
1537            'options': options.select_options_affecting_cache(),
1538            'dep_prios': dep_prios,
1539            'dep_lines': dep_lines,
1540            'interface_hash': interface_hash,
1541            'version_id': manager.version_id,
1542            'ignore_all': ignore_all,
1543            'plugin_data': plugin_data,
1544            }
1545
1546    # Write meta cache file
1547    meta_str = json_dumps(meta, manager.options.debug_cache)
1548    if not metastore.write(meta_json, meta_str):
1549        # Most likely the error is the replace() call
1550        # (see https://github.com/python/mypy/issues/3215).
1551        # The next run will simply find the cache entry out of date.
1552        manager.log("Error writing meta JSON file {}".format(meta_json))
1553
1554    return interface_hash, cache_meta_from_dict(meta, data_json)
1555
1556
1557def delete_cache(id: str, path: str, manager: BuildManager) -> None:
1558    """Delete cache files for a module.
1559
1560    The cache files for a module are deleted when mypy finds errors there.
1561    This avoids inconsistent states with cache files from different mypy runs,
1562    see #4043 for an example.
1563    """
1564    # We don't delete .deps files on errors, since the dependencies
1565    # are mostly generated from other files and the metadata is
1566    # tracked separately.
1567    meta_path, data_path, _ = get_cache_names(id, path, manager.options)
1568    cache_paths = [meta_path, data_path]
1569    manager.log('Deleting {} {} {}'.format(id, path, " ".join(x for x in cache_paths if x)))
1570
1571    for filename in cache_paths:
1572        try:
1573            manager.metastore.remove(filename)
1574        except OSError as e:
1575            if e.errno != errno.ENOENT:
1576                manager.log("Error deleting cache file {}: {}".format(filename, e.strerror))
1577
1578
1579"""Dependency manager.
1580
1581Design
1582======
1583
1584Ideally
1585-------
1586
1587A. Collapse cycles (each SCC -- strongly connected component --
1588   becomes one "supernode").
1589
1590B. Topologically sort nodes based on dependencies.
1591
1592C. Process from leaves towards roots.
1593
1594Wrinkles
1595--------
1596
1597a. Need to parse source modules to determine dependencies.
1598
1599b. Processing order for modules within an SCC.
1600
1601c. Must order mtimes of files to decide whether to re-process; depends
1602   on clock never resetting.
1603
1604d. from P import M; checks filesystem whether module P.M exists in
1605   filesystem.
1606
1607e. Race conditions, where somebody modifies a file while we're
1608   processing. Solved by using a FileSystemCache.
1609
1610
1611Steps
1612-----
1613
16141. For each explicitly given module find the source file location.
1615
16162. For each such module load and check the cache metadata, and decide
1617   whether it's valid.
1618
16193. Now recursively (or iteratively) find dependencies and add those to
1620   the graph:
1621
1622   - for cached nodes use the list of dependencies from the cache
1623     metadata (this will be valid even if we later end up re-parsing
1624     the same source);
1625
1626   - for uncached nodes parse the file and process all imports found,
1627     taking care of (a) above.
1628
1629Step 3 should also address (d) above.
1630
1631Once step 3 terminates we have the entire dependency graph, and for
1632each module we've either loaded the cache metadata or parsed the
1633source code.  (However, we may still need to parse those modules for
1634which we have cache metadata but that depend, directly or indirectly,
1635on at least one module for which the cache metadata is stale.)
1636
1637Now we can execute steps A-C from the first section.  Finding SCCs for
1638step A shouldn't be hard; there's a recipe here:
1639http://code.activestate.com/recipes/578507/.  There's also a plethora
1640of topsort recipes, e.g. http://code.activestate.com/recipes/577413/.
1641
1642For single nodes, processing is simple.  If the node was cached, we
1643deserialize the cache data and fix up cross-references.  Otherwise, we
1644do semantic analysis followed by type checking.  We also handle (c)
1645above; if a module has valid cache data *but* any of its
1646dependencies was processed from source, then the module should be
1647processed from source.
1648
1649A relatively simple optimization (outside SCCs) we might do in the
1650future is as follows: if a node's cache data is valid, but one or more
1651of its dependencies are out of date so we have to re-parse the node
1652from source, once we have fully type-checked the node, we can decide
1653whether its symbol table actually changed compared to the cache data
1654(by reading the cache data and comparing it to the data we would be
1655writing).  If there is no change we can declare the node up to date,
1656and any node that depends (and for which we have cached data, and
1657whose other dependencies are up to date) on it won't need to be
1658re-parsed from source.
1659
1660Import cycles
1661-------------
1662
1663Finally we have to decide how to handle (c), import cycles.  Here
1664we'll need a modified version of the original state machine
1665(build.py), but we only need to do this per SCC, and we won't have to
1666deal with changes to the list of nodes while we're processing it.
1667
1668If all nodes in the SCC have valid cache metadata and all dependencies
1669outside the SCC are still valid, we can proceed as follows:
1670
1671  1. Load cache data for all nodes in the SCC.
1672
1673  2. Fix up cross-references for all nodes in the SCC.
1674
1675Otherwise, the simplest (but potentially slow) way to proceed is to
1676invalidate all cache data in the SCC and re-parse all nodes in the SCC
1677from source.  We can do this as follows:
1678
1679  1. Parse source for all nodes in the SCC.
1680
1681  2. Semantic analysis for all nodes in the SCC.
1682
1683  3. Type check all nodes in the SCC.
1684
1685(If there are more passes the process is the same -- each pass should
1686be done for all nodes before starting the next pass for any nodes in
1687the SCC.)
1688
1689We could process the nodes in the SCC in any order.  For sentimental
1690reasons, I've decided to process them in the reverse order in which we
1691encountered them when originally constructing the graph.  That's how
1692the old build.py deals with cycles, and at least this reproduces the
1693previous implementation more accurately.
1694
1695Can we do better than re-parsing all nodes in the SCC when any of its
1696dependencies are out of date?  It's doubtful.  The optimization
1697mentioned at the end of the previous section would require re-parsing
1698and type-checking a node and then comparing its symbol table to the
1699cached data; but because the node is part of a cycle we can't
1700technically type-check it until the semantic analysis of all other
1701nodes in the cycle has completed.  (This is an important issue because
1702Dropbox has a very large cycle in production code.  But I'd like to
1703deal with it later.)
1704
1705Additional wrinkles
1706-------------------
1707
1708During implementation more wrinkles were found.
1709
1710- When a submodule of a package (e.g. x.y) is encountered, the parent
1711  package (e.g. x) must also be loaded, but it is not strictly a
1712  dependency.  See State.add_ancestors() below.
1713"""
1714
1715
1716class ModuleNotFound(Exception):
1717    """Control flow exception to signal that a module was not found."""
1718
1719
1720class State:
1721    """The state for a module.
1722
1723    The source is only used for the -c command line option; in that
1724    case path is None.  Otherwise source is None and path isn't.
1725    """
1726
1727    manager = None  # type: BuildManager
1728    order_counter = 0  # type: ClassVar[int]
1729    order = None  # type: int  # Order in which modules were encountered
1730    id = None  # type: str  # Fully qualified module name
1731    path = None  # type: Optional[str]  # Path to module source
1732    abspath = None  # type: Optional[str]  # Absolute path to module source
1733    xpath = None  # type: str  # Path or '<string>'
1734    source = None  # type: Optional[str]  # Module source code
1735    source_hash = None  # type: Optional[str]  # Hash calculated based on the source code
1736    meta_source_hash = None  # type: Optional[str]  # Hash of the source given in the meta, if any
1737    meta = None  # type: Optional[CacheMeta]
1738    data = None  # type: Optional[str]
1739    tree = None  # type: Optional[MypyFile]
1740    # We keep both a list and set of dependencies. A set because it makes it efficient to
1741    # prevent duplicates and the list because I am afraid of changing the order of
1742    # iteration over dependencies.
1743    # They should be managed with add_dependency and suppress_dependency.
1744    dependencies = None  # type: List[str]  # Modules directly imported by the module
1745    dependencies_set = None  # type: Set[str]  # The same but as a set for deduplication purposes
1746    suppressed = None  # type: List[str]  # Suppressed/missing dependencies
1747    suppressed_set = None  # type: Set[str]  # Suppressed/missing dependencies
1748    priorities = None  # type: Dict[str, int]
1749
1750    # Map each dependency to the line number where it is first imported
1751    dep_line_map = None  # type: Dict[str, int]
1752
1753    # Parent package, its parent, etc.
1754    ancestors = None  # type: Optional[List[str]]
1755
1756    # List of (path, line number) tuples giving context for import
1757    import_context = None  # type: List[Tuple[str, int]]
1758
1759    # The State from which this module was imported, if any
1760    caller_state = None  # type: Optional[State]
1761
1762    # If caller_state is set, the line number in the caller where the import occurred
1763    caller_line = 0
1764
1765    # If True, indicate that the public interface of this module is unchanged
1766    externally_same = True
1767
1768    # Contains a hash of the public interface in incremental mode
1769    interface_hash = ""  # type: str
1770
1771    # Options, specialized for this file
1772    options = None  # type: Options
1773
1774    # Whether to ignore all errors
1775    ignore_all = False
1776
1777    # Whether the module has an error or any of its dependencies have one.
1778    transitive_error = False
1779
1780    # Errors reported before semantic analysis, to allow fine-grained
1781    # mode to keep reporting them.
1782    early_errors = None  # type: List[ErrorInfo]
1783
1784    # Type checker used for checking this file.  Use type_checker() for
1785    # access and to construct this on demand.
1786    _type_checker = None  # type: Optional[TypeChecker]
1787
1788    fine_grained_deps_loaded = False
1789
1790    def __init__(self,
1791                 id: Optional[str],
1792                 path: Optional[str],
1793                 source: Optional[str],
1794                 manager: BuildManager,
1795                 caller_state: 'Optional[State]' = None,
1796                 caller_line: int = 0,
1797                 ancestor_for: 'Optional[State]' = None,
1798                 root_source: bool = False,
1799                 # If `temporary` is True, this State is being created to just
1800                 # quickly parse/load the tree, without an intention to further
1801                 # process it. With this flag, any changes to external state as well
1802                 # as error reporting should be avoided.
1803                 temporary: bool = False,
1804                 ) -> None:
1805        if not temporary:
1806            assert id or path or source is not None, "Neither id, path nor source given"
1807        self.manager = manager
1808        State.order_counter += 1
1809        self.order = State.order_counter
1810        self.caller_state = caller_state
1811        self.caller_line = caller_line
1812        if caller_state:
1813            self.import_context = caller_state.import_context[:]
1814            self.import_context.append((caller_state.xpath, caller_line))
1815        else:
1816            self.import_context = []
1817        self.id = id or '__main__'
1818        self.options = manager.options.clone_for_module(self.id)
1819        self.early_errors = []
1820        self._type_checker = None
1821        if not path and source is None:
1822            assert id is not None
1823            try:
1824                path, follow_imports = find_module_and_diagnose(
1825                    manager, id, self.options, caller_state, caller_line,
1826                    ancestor_for, root_source, skip_diagnose=temporary)
1827            except ModuleNotFound:
1828                if not temporary:
1829                    manager.missing_modules.add(id)
1830                raise
1831            if follow_imports == 'silent':
1832                self.ignore_all = True
1833        self.path = path
1834        if path:
1835            self.abspath = os.path.abspath(path)
1836        self.xpath = path or '<string>'
1837        if path and source is None and self.manager.fscache.isdir(path):
1838            source = ''
1839        self.source = source
1840        if path and source is None and self.manager.cache_enabled:
1841            self.meta = find_cache_meta(self.id, path, manager)
1842            # TODO: Get mtime if not cached.
1843            if self.meta is not None:
1844                self.interface_hash = self.meta.interface_hash
1845                self.meta_source_hash = self.meta.hash
1846        self.add_ancestors()
1847        t0 = time.time()
1848        self.meta = validate_meta(self.meta, self.id, self.path, self.ignore_all, manager)
1849        self.manager.add_stats(validate_meta_time=time.time() - t0)
1850        if self.meta:
1851            # Make copies, since we may modify these and want to
1852            # compare them to the originals later.
1853            self.dependencies = list(self.meta.dependencies)
1854            self.dependencies_set = set(self.dependencies)
1855            self.suppressed = list(self.meta.suppressed)
1856            self.suppressed_set = set(self.suppressed)
1857            all_deps = self.dependencies + self.suppressed
1858            assert len(all_deps) == len(self.meta.dep_prios)
1859            self.priorities = {id: pri
1860                               for id, pri in zip(all_deps, self.meta.dep_prios)}
1861            assert len(all_deps) == len(self.meta.dep_lines)
1862            self.dep_line_map = {id: line
1863                                 for id, line in zip(all_deps, self.meta.dep_lines)}
1864            if temporary:
1865                self.load_tree(temporary=True)
1866            if not manager.use_fine_grained_cache():
1867                # Special case: if there were a previously missing package imported here
1868                # and it is not present, then we need to re-calculate dependencies.
1869                # This is to support patterns like this:
1870                #     from missing_package import missing_module  # type: ignore
1871                # At first mypy doesn't know that `missing_module` is a module
1872                # (it may be a variable, a class, or a function), so it is not added to
1873                # suppressed dependencies. Therefore, when the package with module is added,
1874                # we need to re-calculate dependencies.
1875                # NOTE: see comment below for why we skip this in fine grained mode.
1876                if exist_added_packages(self.suppressed, manager, self.options):
1877                    self.parse_file()  # This is safe because the cache is anyway stale.
1878                    self.compute_dependencies()
1879        else:
1880            # When doing a fine-grained cache load, pretend we only
1881            # know about modules that have cache information and defer
1882            # handling new modules until the fine-grained update.
1883            if manager.use_fine_grained_cache():
1884                manager.log("Deferring module to fine-grained update %s (%s)" % (path, id))
1885                raise ModuleNotFound
1886
1887            # Parse the file (and then some) to get the dependencies.
1888            self.parse_file()
1889            self.compute_dependencies()
1890
1891    @property
1892    def xmeta(self) -> CacheMeta:
1893        assert self.meta, "missing meta on allegedly fresh module"
1894        return self.meta
1895
1896    def add_ancestors(self) -> None:
1897        if self.path is not None:
1898            _, name = os.path.split(self.path)
1899            base, _ = os.path.splitext(name)
1900            if '.' in base:
1901                # This is just a weird filename, don't add anything
1902                self.ancestors = []
1903                return
1904        # All parent packages are new ancestors.
1905        ancestors = []
1906        parent = self.id
1907        while '.' in parent:
1908            parent, _ = parent.rsplit('.', 1)
1909            ancestors.append(parent)
1910        self.ancestors = ancestors
1911
1912    def is_fresh(self) -> bool:
1913        """Return whether the cache data for this file is fresh."""
1914        # NOTE: self.dependencies may differ from
1915        # self.meta.dependencies when a dependency is dropped due to
1916        # suppression by silent mode.  However when a suppressed
1917        # dependency is added back we find out later in the process.
1918        return (self.meta is not None
1919                and self.is_interface_fresh()
1920                and self.dependencies == self.meta.dependencies)
1921
1922    def is_interface_fresh(self) -> bool:
1923        return self.externally_same
1924
1925    def mark_as_rechecked(self) -> None:
1926        """Marks this module as having been fully re-analyzed by the type-checker."""
1927        self.manager.rechecked_modules.add(self.id)
1928
1929    def mark_interface_stale(self, *, on_errors: bool = False) -> None:
1930        """Marks this module as having a stale public interface, and discards the cache data."""
1931        self.externally_same = False
1932        if not on_errors:
1933            self.manager.stale_modules.add(self.id)
1934
1935    def check_blockers(self) -> None:
1936        """Raise CompileError if a blocking error is detected."""
1937        if self.manager.errors.is_blockers():
1938            self.manager.log("Bailing due to blocking errors")
1939            self.manager.errors.raise_error()
1940
1941    @contextlib.contextmanager
1942    def wrap_context(self, check_blockers: bool = True) -> Iterator[None]:
1943        """Temporarily change the error import context to match this state.
1944
1945        Also report an internal error if an unexpected exception was raised
1946        and raise an exception on a blocking error, unless
1947        check_blockers is False. Skipping blocking error reporting is used
1948        in the semantic analyzer so that we can report all blocking errors
1949        for a file (across multiple targets) to maintain backward
1950        compatibility.
1951        """
1952        save_import_context = self.manager.errors.import_context()
1953        self.manager.errors.set_import_context(self.import_context)
1954        try:
1955            yield
1956        except CompileError:
1957            raise
1958        except Exception as err:
1959            report_internal_error(err, self.path, 0, self.manager.errors,
1960                                  self.options, self.manager.stdout, self.manager.stderr)
1961        self.manager.errors.set_import_context(save_import_context)
1962        # TODO: Move this away once we've removed the old semantic analyzer?
1963        if check_blockers:
1964            self.check_blockers()
1965
1966    def load_fine_grained_deps(self) -> Dict[str, Set[str]]:
1967        return self.manager.load_fine_grained_deps(self.id)
1968
1969    def load_tree(self, temporary: bool = False) -> None:
1970        assert self.meta is not None, "Internal error: this method must be called only" \
1971                                      " for cached modules"
1972        t0 = time.time()
1973        raw = self.manager.metastore.read(self.meta.data_json)
1974        t1 = time.time()
1975        data = json.loads(raw)
1976        t2 = time.time()
1977        # TODO: Assert data file wasn't changed.
1978        self.tree = MypyFile.deserialize(data)
1979        t3 = time.time()
1980        self.manager.add_stats(data_read_time=t1 - t0,
1981                               data_json_load_time=t2 - t1,
1982                               deserialize_time=t3 - t2)
1983        if not temporary:
1984            self.manager.modules[self.id] = self.tree
1985            self.manager.add_stats(fresh_trees=1)
1986
1987    def fix_cross_refs(self) -> None:
1988        assert self.tree is not None, "Internal error: method must be called on parsed file only"
1989        # We need to set allow_missing when doing a fine grained cache
1990        # load because we need to gracefully handle missing modules.
1991        fixup_module(self.tree, self.manager.modules,
1992                     self.options.use_fine_grained_cache)
1993
1994    # Methods for processing modules from source code.
1995
1996    def parse_file(self) -> None:
1997        """Parse file and run first pass of semantic analysis.
1998
1999        Everything done here is local to the file. Don't depend on imported
2000        modules in any way. Also record module dependencies based on imports.
2001        """
2002        if self.tree is not None:
2003            # The file was already parsed (in __init__()).
2004            return
2005
2006        manager = self.manager
2007
2008        # Can we reuse a previously parsed AST? This avoids redundant work in daemon.
2009        cached = self.id in manager.ast_cache
2010        modules = manager.modules
2011        if not cached:
2012            manager.log("Parsing %s (%s)" % (self.xpath, self.id))
2013        else:
2014            manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id))
2015
2016        with self.wrap_context():
2017            source = self.source
2018            self.source = None  # We won't need it again.
2019            if self.path and source is None:
2020                try:
2021                    path = manager.maybe_swap_for_shadow_path(self.path)
2022                    source = decode_python_encoding(manager.fscache.read(path),
2023                                                    manager.options.python_version)
2024                    self.source_hash = manager.fscache.hash_digest(path)
2025                except IOError as ioerr:
2026                    # ioerr.strerror differs for os.stat failures between Windows and
2027                    # other systems, but os.strerror(ioerr.errno) does not, so we use that.
2028                    # (We want the error messages to be platform-independent so that the
2029                    # tests have predictable output.)
2030                    raise CompileError([
2031                        "mypy: can't read file '{}': {}".format(
2032                            self.path, os.strerror(ioerr.errno))],
2033                        module_with_blocker=self.id) from ioerr
2034                except (UnicodeDecodeError, DecodeError) as decodeerr:
2035                    if self.path.endswith('.pyd'):
2036                        err = "mypy: stubgen does not support .pyd files: '{}'".format(self.path)
2037                    else:
2038                        err = "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))
2039                    raise CompileError([err], module_with_blocker=self.id) from decodeerr
2040            else:
2041                assert source is not None
2042                self.source_hash = compute_hash(source)
2043
2044            self.parse_inline_configuration(source)
2045            if not cached:
2046                self.tree = manager.parse_file(self.id, self.xpath, source,
2047                                               self.ignore_all or self.options.ignore_errors,
2048                                               self.options)
2049
2050            else:
2051                # Reuse a cached AST
2052                self.tree = manager.ast_cache[self.id][0]
2053                manager.errors.set_file_ignored_lines(
2054                    self.xpath,
2055                    self.tree.ignored_lines,
2056                    self.ignore_all or self.options.ignore_errors)
2057
2058        if not cached:
2059            # Make a copy of any errors produced during parse time so that
2060            # fine-grained mode can repeat them when the module is
2061            # reprocessed.
2062            self.early_errors = list(manager.errors.error_info_map.get(self.xpath, []))
2063        else:
2064            self.early_errors = manager.ast_cache[self.id][1]
2065
2066        modules[self.id] = self.tree
2067
2068        if not cached:
2069            self.semantic_analysis_pass1()
2070
2071        self.check_blockers()
2072
2073        manager.ast_cache[self.id] = (self.tree, self.early_errors)
2074
2075    def parse_inline_configuration(self, source: str) -> None:
2076        """Check for inline mypy: options directive and parse them."""
2077        flags = get_mypy_comments(source)
2078        if flags:
2079            changes, config_errors = parse_mypy_comments(flags, self.options)
2080            self.options = self.options.apply_changes(changes)
2081            self.manager.errors.set_file(self.xpath, self.id)
2082            for lineno, error in config_errors:
2083                self.manager.errors.report(lineno, 0, error)
2084
2085    def semantic_analysis_pass1(self) -> None:
2086        """Perform pass 1 of semantic analysis, which happens immediately after parsing.
2087
2088        This pass can't assume that any other modules have been processed yet.
2089        """
2090        options = self.options
2091        assert self.tree is not None
2092        # Do the first pass of semantic analysis: analyze the reachability
2093        # of blocks and import statements. We must do this before
2094        # processing imports, since this may mark some import statements as
2095        # unreachable.
2096        #
2097        # TODO: This should not be considered as a semantic analysis
2098        #     pass -- it's an independent pass.
2099        analyzer = SemanticAnalyzerPreAnalysis()
2100        with self.wrap_context():
2101            analyzer.visit_file(self.tree, self.xpath, self.id, options)
2102        # TODO: Do this while constructing the AST?
2103        self.tree.names = SymbolTable()
2104        if options.allow_redefinition:
2105            # Perform renaming across the AST to allow variable redefinitions
2106            self.tree.accept(VariableRenameVisitor())
2107
2108    def add_dependency(self, dep: str) -> None:
2109        if dep not in self.dependencies_set:
2110            self.dependencies.append(dep)
2111            self.dependencies_set.add(dep)
2112        if dep in self.suppressed_set:
2113            self.suppressed.remove(dep)
2114            self.suppressed_set.remove(dep)
2115
2116    def suppress_dependency(self, dep: str) -> None:
2117        if dep in self.dependencies_set:
2118            self.dependencies.remove(dep)
2119            self.dependencies_set.remove(dep)
2120        if dep not in self.suppressed_set:
2121            self.suppressed.append(dep)
2122            self.suppressed_set.add(dep)
2123
2124    def compute_dependencies(self) -> None:
2125        """Compute a module's dependencies after parsing it.
2126
2127        This is used when we parse a file that we didn't have
2128        up-to-date cache information for. When we have an up-to-date
2129        cache, we just use the cached info.
2130        """
2131        manager = self.manager
2132        assert self.tree is not None
2133
2134        # Compute (direct) dependencies.
2135        # Add all direct imports (this is why we needed the first pass).
2136        # Also keep track of each dependency's source line.
2137        # Missing dependencies will be moved from dependencies to
2138        # suppressed when they fail to be loaded in load_graph.
2139
2140        self.dependencies = []
2141        self.dependencies_set = set()
2142        self.suppressed = []
2143        self.suppressed_set = set()
2144        self.priorities = {}  # id -> priority
2145        self.dep_line_map = {}  # id -> line
2146        dep_entries = (manager.all_imported_modules_in_file(self.tree) +
2147                       self.manager.plugin.get_additional_deps(self.tree))
2148        for pri, id, line in dep_entries:
2149            self.priorities[id] = min(pri, self.priorities.get(id, PRI_ALL))
2150            if id == self.id:
2151                continue
2152            self.add_dependency(id)
2153            if id not in self.dep_line_map:
2154                self.dep_line_map[id] = line
2155        # Every module implicitly depends on builtins.
2156        if self.id != 'builtins':
2157            self.add_dependency('builtins')
2158
2159        self.check_blockers()  # Can fail due to bogus relative imports
2160
2161    def type_check_first_pass(self) -> None:
2162        if self.options.semantic_analysis_only:
2163            return
2164        with self.wrap_context():
2165            self.type_checker().check_first_pass()
2166
2167    def type_checker(self) -> TypeChecker:
2168        if not self._type_checker:
2169            assert self.tree is not None, "Internal error: must be called on parsed file only"
2170            manager = self.manager
2171            self._type_checker = TypeChecker(manager.errors, manager.modules, self.options,
2172                                             self.tree, self.xpath, manager.plugin)
2173        return self._type_checker
2174
2175    def type_map(self) -> Dict[Expression, Type]:
2176        return self.type_checker().type_map
2177
2178    def type_check_second_pass(self) -> bool:
2179        if self.options.semantic_analysis_only:
2180            return False
2181        with self.wrap_context():
2182            return self.type_checker().check_second_pass()
2183
2184    def finish_passes(self) -> None:
2185        assert self.tree is not None, "Internal error: method must be called on parsed file only"
2186        manager = self.manager
2187        if self.options.semantic_analysis_only:
2188            return
2189        with self.wrap_context():
2190            # Some tests (and tools) want to look at the set of all types.
2191            options = manager.options
2192            if options.export_types:
2193                manager.all_types.update(self.type_map())
2194
2195            # We should always patch indirect dependencies, even in full (non-incremental) builds,
2196            # because the cache still may be written, and it must be correct.
2197            self._patch_indirect_dependencies(self.type_checker().module_refs, self.type_map())
2198
2199            if self.options.dump_inference_stats:
2200                dump_type_stats(self.tree,
2201                                self.xpath,
2202                                modules=self.manager.modules,
2203                                inferred=True,
2204                                typemap=self.type_map())
2205            manager.report_file(self.tree, self.type_map(), self.options)
2206
2207            self.update_fine_grained_deps(self.manager.fg_deps)
2208            self.free_state()
2209            if not manager.options.fine_grained_incremental and not manager.options.preserve_asts:
2210                free_tree(self.tree)
2211
2212    def free_state(self) -> None:
2213        if self._type_checker:
2214            self._type_checker.reset()
2215            self._type_checker = None
2216
2217    def _patch_indirect_dependencies(self,
2218                                     module_refs: Set[str],
2219                                     type_map: Dict[Expression, Type]) -> None:
2220        types = set(type_map.values())
2221        assert None not in types
2222        valid = self.valid_references()
2223
2224        encountered = self.manager.indirection_detector.find_modules(types) | module_refs
2225        extra = encountered - valid
2226
2227        for dep in sorted(extra):
2228            if dep not in self.manager.modules:
2229                continue
2230            if dep not in self.suppressed_set and dep not in self.manager.missing_modules:
2231                self.add_dependency(dep)
2232                self.priorities[dep] = PRI_INDIRECT
2233            elif dep not in self.suppressed_set and dep in self.manager.missing_modules:
2234                self.suppress_dependency(dep)
2235
2236    def compute_fine_grained_deps(self) -> Dict[str, Set[str]]:
2237        assert self.tree is not None
2238        if self.id in ('builtins', 'typing', 'types', 'sys', '_typeshed'):
2239            # We don't track changes to core parts of typeshed -- the
2240            # assumption is that they are only changed as part of mypy
2241            # updates, which will invalidate everything anyway. These
2242            # will always be processed in the initial non-fine-grained
2243            # build. Other modules may be brought in as a result of an
2244            # fine-grained increment, and we may need these
2245            # dependencies then to handle cyclic imports.
2246            return {}
2247        from mypy.server.deps import get_dependencies  # Lazy import to speed up startup
2248        return get_dependencies(target=self.tree,
2249                                type_map=self.type_map(),
2250                                python_version=self.options.python_version,
2251                                options=self.manager.options)
2252
2253    def update_fine_grained_deps(self, deps: Dict[str, Set[str]]) -> None:
2254        options = self.manager.options
2255        if options.cache_fine_grained or options.fine_grained_incremental:
2256            from mypy.server.deps import merge_dependencies  # Lazy import to speed up startup
2257            merge_dependencies(self.compute_fine_grained_deps(), deps)
2258            TypeState.update_protocol_deps(deps)
2259
2260    def valid_references(self) -> Set[str]:
2261        assert self.ancestors is not None
2262        valid_refs = set(self.dependencies + self.suppressed + self.ancestors)
2263        valid_refs.add(self.id)
2264
2265        if "os" in valid_refs:
2266            valid_refs.add("os.path")
2267
2268        return valid_refs
2269
2270    def write_cache(self) -> None:
2271        assert self.tree is not None, "Internal error: method must be called on parsed file only"
2272        # We don't support writing cache files in fine-grained incremental mode.
2273        if (not self.path
2274                or self.options.cache_dir == os.devnull
2275                or self.options.fine_grained_incremental):
2276            return
2277        is_errors = self.transitive_error
2278        if is_errors:
2279            delete_cache(self.id, self.path, self.manager)
2280            self.meta = None
2281            self.mark_interface_stale(on_errors=True)
2282            return
2283        dep_prios = self.dependency_priorities()
2284        dep_lines = self.dependency_lines()
2285        assert self.source_hash is not None
2286        assert len(set(self.dependencies)) == len(self.dependencies), (
2287            "Duplicates in dependencies list for {} ({})".format(self.id, self.dependencies))
2288        new_interface_hash, self.meta = write_cache(
2289            self.id, self.path, self.tree,
2290            list(self.dependencies), list(self.suppressed),
2291            dep_prios, dep_lines, self.interface_hash, self.source_hash, self.ignore_all,
2292            self.manager)
2293        if new_interface_hash == self.interface_hash:
2294            self.manager.log("Cached module {} has same interface".format(self.id))
2295        else:
2296            self.manager.log("Cached module {} has changed interface".format(self.id))
2297            self.mark_interface_stale()
2298            self.interface_hash = new_interface_hash
2299
2300    def verify_dependencies(self, suppressed_only: bool = False) -> None:
2301        """Report errors for import targets in modules that don't exist.
2302
2303        If suppressed_only is set, only check suppressed dependencies.
2304        """
2305        manager = self.manager
2306        assert self.ancestors is not None
2307        if suppressed_only:
2308            all_deps = self.suppressed
2309        else:
2310            # Strip out indirect dependencies. See comment in build.load_graph().
2311            dependencies = [dep for dep in self.dependencies
2312                            if self.priorities.get(dep) != PRI_INDIRECT]
2313            all_deps = dependencies + self.suppressed + self.ancestors
2314        for dep in all_deps:
2315            if dep in manager.modules:
2316                continue
2317            options = manager.options.clone_for_module(dep)
2318            if options.ignore_missing_imports:
2319                continue
2320            line = self.dep_line_map.get(dep, 1)
2321            try:
2322                if dep in self.ancestors:
2323                    state, ancestor = None, self  # type: (Optional[State], Optional[State])
2324                else:
2325                    state, ancestor = self, None
2326                # Called just for its side effects of producing diagnostics.
2327                find_module_and_diagnose(
2328                    manager, dep, options,
2329                    caller_state=state, caller_line=line,
2330                    ancestor_for=ancestor)
2331            except (ModuleNotFound, CompileError):
2332                # Swallow up any ModuleNotFounds or CompilerErrors while generating
2333                # a diagnostic. CompileErrors may get generated in
2334                # fine-grained mode when an __init__.py is deleted, if a module
2335                # that was in that package has targets reprocessed before
2336                # it is renamed.
2337                pass
2338
2339    def dependency_priorities(self) -> List[int]:
2340        return [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies + self.suppressed]
2341
2342    def dependency_lines(self) -> List[int]:
2343        return [self.dep_line_map.get(dep, 1) for dep in self.dependencies + self.suppressed]
2344
2345    def generate_unused_ignore_notes(self) -> None:
2346        if self.options.warn_unused_ignores:
2347            # If this file was initially loaded from the cache, it may have suppressed
2348            # dependencies due to imports with ignores on them. We need to generate
2349            # those errors to avoid spuriously flagging them as unused ignores.
2350            if self.meta:
2351                self.verify_dependencies(suppressed_only=True)
2352            self.manager.errors.generate_unused_ignore_errors(self.xpath)
2353
2354
2355# Module import and diagnostic glue
2356
2357
2358def find_module_and_diagnose(manager: BuildManager,
2359                             id: str,
2360                             options: Options,
2361                             caller_state: 'Optional[State]' = None,
2362                             caller_line: int = 0,
2363                             ancestor_for: 'Optional[State]' = None,
2364                             root_source: bool = False,
2365                             skip_diagnose: bool = False) -> Tuple[str, str]:
2366    """Find a module by name, respecting follow_imports and producing diagnostics.
2367
2368    If the module is not found, then the ModuleNotFound exception is raised.
2369
2370    Args:
2371      id: module to find
2372      options: the options for the module being loaded
2373      caller_state: the state of the importing module, if applicable
2374      caller_line: the line number of the import
2375      ancestor_for: the child module this is an ancestor of, if applicable
2376      root_source: whether this source was specified on the command line
2377      skip_diagnose: skip any error diagnosis and reporting (but ModuleNotFound is
2378          still raised if the module is missing)
2379
2380    The specified value of follow_imports for a module can be overridden
2381    if the module is specified on the command line or if it is a stub,
2382    so we compute and return the "effective" follow_imports of the module.
2383
2384    Returns a tuple containing (file path, target's effective follow_imports setting)
2385    """
2386    file_id = id
2387    if id == 'builtins' and options.python_version[0] == 2:
2388        # The __builtin__ module is called internally by mypy
2389        # 'builtins' in Python 2 mode (similar to Python 3),
2390        # but the stub file is __builtin__.pyi.  The reason is
2391        # that a lot of code hard-codes 'builtins.x' and it's
2392        # easier to work it around like this.  It also means
2393        # that the implementation can mostly ignore the
2394        # difference and just assume 'builtins' everywhere,
2395        # which simplifies code.
2396        file_id = '__builtin__'
2397    result = find_module_with_reason(file_id, manager)
2398    if isinstance(result, str):
2399        # For non-stubs, look at options.follow_imports:
2400        # - normal (default) -> fully analyze
2401        # - silent -> analyze but silence errors
2402        # - skip -> don't analyze, make the type Any
2403        follow_imports = options.follow_imports
2404        if (root_source  # Honor top-level modules
2405                or (not result.endswith('.py')  # Stubs are always normal
2406                    and not options.follow_imports_for_stubs)  # except when they aren't
2407                or id in mypy.semanal_main.core_modules):  # core is always normal
2408            follow_imports = 'normal'
2409        if skip_diagnose:
2410            pass
2411        elif follow_imports == 'silent':
2412            # Still import it, but silence non-blocker errors.
2413            manager.log("Silencing %s (%s)" % (result, id))
2414        elif follow_imports == 'skip' or follow_imports == 'error':
2415            # In 'error' mode, produce special error messages.
2416            if id not in manager.missing_modules:
2417                manager.log("Skipping %s (%s)" % (result, id))
2418            if follow_imports == 'error':
2419                if ancestor_for:
2420                    skipping_ancestor(manager, id, result, ancestor_for)
2421                else:
2422                    skipping_module(manager, caller_line, caller_state,
2423                                    id, result)
2424            raise ModuleNotFound
2425        if not manager.options.no_silence_site_packages:
2426            for dir in manager.search_paths.package_path + manager.search_paths.typeshed_path:
2427                if is_sub_path(result, dir):
2428                    # Silence errors in site-package dirs and typeshed
2429                    follow_imports = 'silent'
2430        if (id in CORE_BUILTIN_MODULES
2431                and not is_typeshed_file(result)
2432                and not is_stub_package_file(result)
2433                and not options.use_builtins_fixtures
2434                and not options.custom_typeshed_dir):
2435            raise CompileError([
2436                'mypy: "%s" shadows library module "%s"' % (os.path.relpath(result), id),
2437                'note: A user-defined top-level module with name "%s" is not supported' % id
2438            ])
2439        return (result, follow_imports)
2440    else:
2441        # Could not find a module.  Typically the reason is a
2442        # misspelled module name, missing stub, module not in
2443        # search path or the module has not been installed.
2444
2445        ignore_missing_imports = options.ignore_missing_imports
2446        top_level, second_level = get_top_two_prefixes(file_id)
2447        # Don't honor a global (not per-module) ignore_missing_imports
2448        # setting for modules that used to have bundled stubs, as
2449        # otherwise updating mypy can silently result in new false
2450        # negatives. (Unless there are stubs but they are incomplete.)
2451        global_ignore_missing_imports = manager.options.ignore_missing_imports
2452        py_ver = options.python_version[0]
2453        if ((is_legacy_bundled_package(top_level, py_ver)
2454                or is_legacy_bundled_package(second_level, py_ver))
2455                and global_ignore_missing_imports
2456                and not options.ignore_missing_imports_per_module
2457                and result is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED):
2458            ignore_missing_imports = False
2459
2460        if skip_diagnose:
2461            raise ModuleNotFound
2462        if caller_state:
2463            if not (ignore_missing_imports or in_partial_package(id, manager)):
2464                module_not_found(manager, caller_line, caller_state, id, result)
2465            raise ModuleNotFound
2466        elif root_source:
2467            # If we can't find a root source it's always fatal.
2468            # TODO: This might hide non-fatal errors from
2469            # root sources processed earlier.
2470            raise CompileError(["mypy: can't find module '%s'" % id])
2471        else:
2472            raise ModuleNotFound
2473
2474
2475def exist_added_packages(suppressed: List[str],
2476                         manager: BuildManager, options: Options) -> bool:
2477    """Find if there are any newly added packages that were previously suppressed.
2478
2479    Exclude everything not in build for follow-imports=skip.
2480    """
2481    for dep in suppressed:
2482        if dep in manager.source_set.source_modules:
2483            # We don't need to add any special logic for this. If a module
2484            # is added to build, importers will be invalidated by normal mechanism.
2485            continue
2486        path = find_module_simple(dep, manager)
2487        if not path:
2488            continue
2489        if (options.follow_imports == 'skip' and
2490                (not path.endswith('.pyi') or options.follow_imports_for_stubs)):
2491            continue
2492        if '__init__.py' in path:
2493            # It is better to have a bit lenient test, this will only slightly reduce
2494            # performance, while having a too strict test may affect correctness.
2495            return True
2496    return False
2497
2498
2499def find_module_simple(id: str, manager: BuildManager) -> Optional[str]:
2500    """Find a filesystem path for module `id` or `None` if not found."""
2501    x = find_module_with_reason(id, manager)
2502    if isinstance(x, ModuleNotFoundReason):
2503        return None
2504    return x
2505
2506
2507def find_module_with_reason(id: str, manager: BuildManager) -> ModuleSearchResult:
2508    """Find a filesystem path for module `id` or the reason it can't be found."""
2509    t0 = time.time()
2510    x = manager.find_module_cache.find_module(id)
2511    manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1)
2512    return x
2513
2514
2515def in_partial_package(id: str, manager: BuildManager) -> bool:
2516    """Check if a missing module can potentially be a part of a package.
2517
2518    This checks if there is any existing parent __init__.pyi stub that
2519    defines a module-level __getattr__ (a.k.a. partial stub package).
2520    """
2521    while '.' in id:
2522        parent, _ = id.rsplit('.', 1)
2523        if parent in manager.modules:
2524            parent_mod = manager.modules[parent]  # type: Optional[MypyFile]
2525        else:
2526            # Parent is not in build, try quickly if we can find it.
2527            try:
2528                parent_st = State(id=parent, path=None, source=None, manager=manager,
2529                                  temporary=True)
2530            except (ModuleNotFound, CompileError):
2531                parent_mod = None
2532            else:
2533                parent_mod = parent_st.tree
2534        if parent_mod is not None:
2535            if parent_mod.is_partial_stub_package:
2536                return True
2537            else:
2538                # Bail out soon, complete subpackage found
2539                return False
2540        id = parent
2541    return False
2542
2543
2544def module_not_found(manager: BuildManager, line: int, caller_state: State,
2545                     target: str, reason: ModuleNotFoundReason) -> None:
2546    errors = manager.errors
2547    save_import_context = errors.import_context()
2548    errors.set_import_context(caller_state.import_context)
2549    errors.set_file(caller_state.xpath, caller_state.id)
2550    if target == 'builtins':
2551        errors.report(line, 0, "Cannot find 'builtins' module. Typeshed appears broken!",
2552                      blocker=True)
2553        errors.raise_error()
2554    else:
2555        daemon = manager.options.fine_grained_incremental
2556        msg, notes = reason.error_message_templates(daemon)
2557        pyver = '%d.%d' % manager.options.python_version
2558        errors.report(line, 0, msg.format(module=target, pyver=pyver), code=codes.IMPORT)
2559        top_level, second_level = get_top_two_prefixes(target)
2560        if second_level in legacy_bundled_packages:
2561            top_level = second_level
2562        for note in notes:
2563            if '{stub_dist}' in note:
2564                note = note.format(stub_dist=legacy_bundled_packages[top_level].name)
2565            errors.report(line, 0, note, severity='note', only_once=True, code=codes.IMPORT)
2566        if reason is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED:
2567            manager.missing_stub_packages.add(legacy_bundled_packages[top_level].name)
2568    errors.set_import_context(save_import_context)
2569
2570
2571def skipping_module(manager: BuildManager, line: int, caller_state: Optional[State],
2572                    id: str, path: str) -> None:
2573    """Produce an error for an import ignored due to --follow_imports=error"""
2574    assert caller_state, (id, path)
2575    save_import_context = manager.errors.import_context()
2576    manager.errors.set_import_context(caller_state.import_context)
2577    manager.errors.set_file(caller_state.xpath, caller_state.id)
2578    manager.errors.report(line, 0,
2579                          'Import of "%s" ignored' % (id,),
2580                          severity='error')
2581    manager.errors.report(line, 0,
2582                          "(Using --follow-imports=error, module not passed on command line)",
2583                          severity='note', only_once=True)
2584    manager.errors.set_import_context(save_import_context)
2585
2586
2587def skipping_ancestor(manager: BuildManager, id: str, path: str, ancestor_for: 'State') -> None:
2588    """Produce an error for an ancestor ignored due to --follow_imports=error"""
2589    # TODO: Read the path (the __init__.py file) and return
2590    # immediately if it's empty or only contains comments.
2591    # But beware, some package may be the ancestor of many modules,
2592    # so we'd need to cache the decision.
2593    manager.errors.set_import_context([])
2594    manager.errors.set_file(ancestor_for.xpath, ancestor_for.id)
2595    manager.errors.report(-1, -1, 'Ancestor package "%s" ignored' % (id,),
2596                          severity='error', only_once=True)
2597    manager.errors.report(-1, -1,
2598                          "(Using --follow-imports=error, submodule passed on command line)",
2599                          severity='note', only_once=True)
2600
2601
2602def log_configuration(manager: BuildManager, sources: List[BuildSource]) -> None:
2603    """Output useful configuration information to LOG and TRACE"""
2604
2605    manager.log()
2606    configuration_vars = [
2607        ("Mypy Version", __version__),
2608        ("Config File", (manager.options.config_file or "Default")),
2609        ("Configured Executable", manager.options.python_executable or "None"),
2610        ("Current Executable", sys.executable),
2611        ("Cache Dir", manager.options.cache_dir),
2612        ("Compiled", str(not __file__.endswith(".py"))),
2613        ("Exclude", manager.options.exclude),
2614    ]
2615
2616    for conf_name, conf_value in configuration_vars:
2617        manager.log("{:24}{}".format(conf_name + ":", conf_value))
2618
2619    for source in sources:
2620        manager.log("{:24}{}".format("Found source:", source))
2621
2622    # Complete list of searched paths can get very long, put them under TRACE
2623    for path_type, paths in manager.search_paths._asdict().items():
2624        if not paths:
2625            manager.trace("No %s" % path_type)
2626            continue
2627
2628        manager.trace("%s:" % path_type)
2629
2630        for pth in paths:
2631            manager.trace("    %s" % pth)
2632
2633
2634# The driver
2635
2636
2637def dispatch(sources: List[BuildSource],
2638             manager: BuildManager,
2639             stdout: TextIO,
2640             ) -> Graph:
2641    log_configuration(manager, sources)
2642
2643    t0 = time.time()
2644    graph = load_graph(sources, manager)
2645
2646    # This is a kind of unfortunate hack to work around some of fine-grained's
2647    # fragility: if we have loaded less than 50% of the specified files from
2648    # cache in fine-grained cache mode, load the graph again honestly.
2649    # In this case, we just turn the cache off entirely, so we don't need
2650    # to worry about some files being loaded and some from cache and so
2651    # that fine-grained mode never *writes* to the cache.
2652    if manager.use_fine_grained_cache() and len(graph) < 0.50 * len(sources):
2653        manager.log("Redoing load_graph without cache because too much was missing")
2654        manager.cache_enabled = False
2655        graph = load_graph(sources, manager)
2656
2657    t1 = time.time()
2658    manager.add_stats(graph_size=len(graph),
2659                      stubs_found=sum(g.path is not None and g.path.endswith('.pyi')
2660                                      for g in graph.values()),
2661                      graph_load_time=(t1 - t0),
2662                      fm_cache_size=len(manager.find_module_cache.results),
2663                      )
2664    if not graph:
2665        print("Nothing to do?!", file=stdout)
2666        return graph
2667    manager.log("Loaded graph with %d nodes (%.3f sec)" % (len(graph), t1 - t0))
2668    if manager.options.dump_graph:
2669        dump_graph(graph, stdout)
2670        return graph
2671
2672    # Fine grained dependencies that didn't have an associated module in the build
2673    # are serialized separately, so we read them after we load the graph.
2674    # We need to read them both for running in daemon mode and if we are generating
2675    # a fine-grained cache (so that we can properly update them incrementally).
2676    # The `read_deps_cache` will also validate
2677    # the deps cache against the loaded individual cache files.
2678    if manager.options.cache_fine_grained or manager.use_fine_grained_cache():
2679        t2 = time.time()
2680        fg_deps_meta = read_deps_cache(manager, graph)
2681        manager.add_stats(load_fg_deps_time=time.time() - t2)
2682        if fg_deps_meta is not None:
2683            manager.fg_deps_meta = fg_deps_meta
2684        elif manager.stats.get('fresh_metas', 0) > 0:
2685            # Clear the stats so we don't infinite loop because of positive fresh_metas
2686            manager.stats.clear()
2687            # There were some cache files read, but no fine-grained dependencies loaded.
2688            manager.log("Error reading fine-grained dependencies cache -- aborting cache load")
2689            manager.cache_enabled = False
2690            manager.log("Falling back to full run -- reloading graph...")
2691            return dispatch(sources, manager, stdout)
2692
2693    # If we are loading a fine-grained incremental mode cache, we
2694    # don't want to do a real incremental reprocess of the
2695    # graph---we'll handle it all later.
2696    if not manager.use_fine_grained_cache():
2697        process_graph(graph, manager)
2698        # Update plugins snapshot.
2699        write_plugins_snapshot(manager)
2700        manager.old_plugins_snapshot = manager.plugins_snapshot
2701        if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:
2702            # If we are running a daemon or are going to write cache for further fine grained use,
2703            # then we need to collect fine grained protocol dependencies.
2704            # Since these are a global property of the program, they are calculated after we
2705            # processed the whole graph.
2706            TypeState.add_all_protocol_deps(manager.fg_deps)
2707            if not manager.options.fine_grained_incremental:
2708                rdeps = generate_deps_for_cache(manager, graph)
2709                write_deps_cache(rdeps, manager, graph)
2710
2711    if manager.options.dump_deps:
2712        # This speeds up startup a little when not using the daemon mode.
2713        from mypy.server.deps import dump_all_dependencies
2714        dump_all_dependencies(manager.modules, manager.all_types,
2715                              manager.options.python_version, manager.options)
2716    return graph
2717
2718
2719class NodeInfo:
2720    """Some info about a node in the graph of SCCs."""
2721
2722    def __init__(self, index: int, scc: List[str]) -> None:
2723        self.node_id = "n%d" % index
2724        self.scc = scc
2725        self.sizes = {}  # type: Dict[str, int]  # mod -> size in bytes
2726        self.deps = {}  # type: Dict[str, int]  # node_id -> pri
2727
2728    def dumps(self) -> str:
2729        """Convert to JSON string."""
2730        total_size = sum(self.sizes.values())
2731        return "[%s, %s, %s,\n     %s,\n     %s]" % (json.dumps(self.node_id),
2732                                                     json.dumps(total_size),
2733                                                     json.dumps(self.scc),
2734                                                     json.dumps(self.sizes),
2735                                                     json.dumps(self.deps))
2736
2737
2738def dump_graph(graph: Graph, stdout: Optional[TextIO] = None) -> None:
2739    """Dump the graph as a JSON string to stdout.
2740
2741    This copies some of the work by process_graph()
2742    (sorted_components() and order_ascc()).
2743    """
2744    stdout = stdout or sys.stdout
2745    nodes = []
2746    sccs = sorted_components(graph)
2747    for i, ascc in enumerate(sccs):
2748        scc = order_ascc(graph, ascc)
2749        node = NodeInfo(i, scc)
2750        nodes.append(node)
2751    inv_nodes = {}  # module -> node_id
2752    for node in nodes:
2753        for mod in node.scc:
2754            inv_nodes[mod] = node.node_id
2755    for node in nodes:
2756        for mod in node.scc:
2757            state = graph[mod]
2758            size = 0
2759            if state.path:
2760                try:
2761                    size = os.path.getsize(state.path)
2762                except os.error:
2763                    pass
2764            node.sizes[mod] = size
2765            for dep in state.dependencies:
2766                if dep in state.priorities:
2767                    pri = state.priorities[dep]
2768                    if dep in inv_nodes:
2769                        dep_id = inv_nodes[dep]
2770                        if (dep_id != node.node_id and
2771                                (dep_id not in node.deps or pri < node.deps[dep_id])):
2772                            node.deps[dep_id] = pri
2773    print("[" + ",\n ".join(node.dumps() for node in nodes) + "\n]", file=stdout)
2774
2775
2776def load_graph(sources: List[BuildSource], manager: BuildManager,
2777               old_graph: Optional[Graph] = None,
2778               new_modules: Optional[List[State]] = None) -> Graph:
2779    """Given some source files, load the full dependency graph.
2780
2781    If an old_graph is passed in, it is used as the starting point and
2782    modified during graph loading.
2783
2784    If a new_modules is passed in, any modules that are loaded are
2785    added to the list. This is an argument and not a return value
2786    so that the caller can access it even if load_graph fails.
2787
2788    As this may need to parse files, this can raise CompileError in case
2789    there are syntax errors.
2790    """
2791
2792    graph = old_graph if old_graph is not None else {}  # type: Graph
2793
2794    # The deque is used to implement breadth-first traversal.
2795    # TODO: Consider whether to go depth-first instead.  This may
2796    # affect the order in which we process files within import cycles.
2797    new = new_modules if new_modules is not None else []
2798    entry_points = set()  # type: Set[str]
2799    # Seed the graph with the initial root sources.
2800    for bs in sources:
2801        try:
2802            st = State(id=bs.module, path=bs.path, source=bs.text, manager=manager,
2803                       root_source=True)
2804        except ModuleNotFound:
2805            continue
2806        if st.id in graph:
2807            manager.errors.set_file(st.xpath, st.id)
2808            manager.errors.report(
2809                -1, -1,
2810                'Duplicate module named "%s" (also at "%s")' % (st.id, graph[st.id].xpath),
2811                blocker=True,
2812            )
2813            manager.errors.report(
2814                -1, -1,
2815                "Are you missing an __init__.py? Alternatively, consider using --exclude to "
2816                "avoid checking one of them.",
2817                severity='note'
2818            )
2819
2820            manager.errors.raise_error()
2821        graph[st.id] = st
2822        new.append(st)
2823        entry_points.add(bs.module)
2824
2825    # Note: Running this each time could be slow in the daemon. If it's a problem, we
2826    # can do more work to maintain this incrementally.
2827    seen_files = {st.abspath: st for st in graph.values() if st.path}
2828
2829    # Collect dependencies.  We go breadth-first.
2830    # More nodes might get added to new as we go, but that's fine.
2831    for st in new:
2832        assert st.ancestors is not None
2833        # Strip out indirect dependencies.  These will be dealt with
2834        # when they show up as direct dependencies, and there's a
2835        # scenario where they hurt:
2836        # - Suppose A imports B and B imports C.
2837        # - Suppose on the next round:
2838        #   - C is deleted;
2839        #   - B is updated to remove the dependency on C;
2840        #   - A is unchanged.
2841        # - In this case A's cached *direct* dependencies are still valid
2842        #   (since direct dependencies reflect the imports found in the source)
2843        #   but A's cached *indirect* dependency on C is wrong.
2844        dependencies = [dep for dep in st.dependencies if st.priorities.get(dep) != PRI_INDIRECT]
2845        if not manager.use_fine_grained_cache():
2846            # TODO: Ideally we could skip here modules that appeared in st.suppressed
2847            # because they are not in build with `follow-imports=skip`.
2848            # This way we could avoid overhead of cloning options in `State.__init__()`
2849            # below to get the option value. This is quite minor performance loss however.
2850            added = [dep for dep in st.suppressed if find_module_simple(dep, manager)]
2851        else:
2852            # During initial loading we don't care about newly added modules,
2853            # they will be taken care of during fine grained update. See also
2854            # comment about this in `State.__init__()`.
2855            added = []
2856        for dep in st.ancestors + dependencies + st.suppressed:
2857            ignored = dep in st.suppressed_set and dep not in entry_points
2858            if ignored and dep not in added:
2859                manager.missing_modules.add(dep)
2860            elif dep not in graph:
2861                try:
2862                    if dep in st.ancestors:
2863                        # TODO: Why not 'if dep not in st.dependencies' ?
2864                        # Ancestors don't have import context.
2865                        newst = State(id=dep, path=None, source=None, manager=manager,
2866                                      ancestor_for=st)
2867                    else:
2868                        newst = State(id=dep, path=None, source=None, manager=manager,
2869                                      caller_state=st, caller_line=st.dep_line_map.get(dep, 1))
2870                except ModuleNotFound:
2871                    if dep in st.dependencies_set:
2872                        st.suppress_dependency(dep)
2873                else:
2874                    if newst.path:
2875                        newst_path = os.path.abspath(newst.path)
2876
2877                        if newst_path in seen_files:
2878                            manager.errors.report(
2879                                -1, 0,
2880                                'Source file found twice under different module names: '
2881                                '"{}" and "{}"'.format(seen_files[newst_path].id, newst.id),
2882                                blocker=True)
2883                            manager.errors.raise_error()
2884
2885                        seen_files[newst_path] = newst
2886
2887                    assert newst.id not in graph, newst.id
2888                    graph[newst.id] = newst
2889                    new.append(newst)
2890            if dep in graph and dep in st.suppressed_set:
2891                # Previously suppressed file is now visible
2892                st.add_dependency(dep)
2893    manager.plugin.set_modules(manager.modules)
2894    return graph
2895
2896
2897def process_graph(graph: Graph, manager: BuildManager) -> None:
2898    """Process everything in dependency order."""
2899    sccs = sorted_components(graph)
2900    manager.log("Found %d SCCs; largest has %d nodes" %
2901                (len(sccs), max(len(scc) for scc in sccs)))
2902
2903    fresh_scc_queue = []  # type: List[List[str]]
2904
2905    # We're processing SCCs from leaves (those without further
2906    # dependencies) to roots (those from which everything else can be
2907    # reached).
2908    for ascc in sccs:
2909        # Order the SCC's nodes using a heuristic.
2910        # Note that ascc is a set, and scc is a list.
2911        scc = order_ascc(graph, ascc)
2912        # If builtins is in the list, move it last.  (This is a bit of
2913        # a hack, but it's necessary because the builtins module is
2914        # part of a small cycle involving at least {builtins, abc,
2915        # typing}.  Of these, builtins must be processed last or else
2916        # some builtin objects will be incompletely processed.)
2917        if 'builtins' in ascc:
2918            scc.remove('builtins')
2919            scc.append('builtins')
2920        if manager.options.verbosity >= 2:
2921            for id in scc:
2922                manager.trace("Priorities for %s:" % id,
2923                              " ".join("%s:%d" % (x, graph[id].priorities[x])
2924                                       for x in graph[id].dependencies
2925                                       if x in ascc and x in graph[id].priorities))
2926        # Because the SCCs are presented in topological sort order, we
2927        # don't need to look at dependencies recursively for staleness
2928        # -- the immediate dependencies are sufficient.
2929        stale_scc = {id for id in scc if not graph[id].is_fresh()}
2930        fresh = not stale_scc
2931        deps = set()
2932        for id in scc:
2933            deps.update(graph[id].dependencies)
2934        deps -= ascc
2935        stale_deps = {id for id in deps if id in graph and not graph[id].is_interface_fresh()}
2936        fresh = fresh and not stale_deps
2937        undeps = set()
2938        if fresh:
2939            # Check if any dependencies that were suppressed according
2940            # to the cache have been added back in this run.
2941            # NOTE: Newly suppressed dependencies are handled by is_fresh().
2942            for id in scc:
2943                undeps.update(graph[id].suppressed)
2944            undeps &= graph.keys()
2945            if undeps:
2946                fresh = False
2947        if fresh:
2948            # All cache files are fresh.  Check that no dependency's
2949            # cache file is newer than any scc node's cache file.
2950            oldest_in_scc = min(graph[id].xmeta.data_mtime for id in scc)
2951            viable = {id for id in stale_deps if graph[id].meta is not None}
2952            newest_in_deps = 0 if not viable else max(graph[dep].xmeta.data_mtime
2953                                                      for dep in viable)
2954            if manager.options.verbosity >= 3:  # Dump all mtimes for extreme debugging.
2955                all_ids = sorted(ascc | viable, key=lambda id: graph[id].xmeta.data_mtime)
2956                for id in all_ids:
2957                    if id in scc:
2958                        if graph[id].xmeta.data_mtime < newest_in_deps:
2959                            key = "*id:"
2960                        else:
2961                            key = "id:"
2962                    else:
2963                        if graph[id].xmeta.data_mtime > oldest_in_scc:
2964                            key = "+dep:"
2965                        else:
2966                            key = "dep:"
2967                    manager.trace(" %5s %.0f %s" % (key, graph[id].xmeta.data_mtime, id))
2968            # If equal, give the benefit of the doubt, due to 1-sec time granularity
2969            # (on some platforms).
2970            if oldest_in_scc < newest_in_deps:
2971                fresh = False
2972                fresh_msg = "out of date by %.0f seconds" % (newest_in_deps - oldest_in_scc)
2973            else:
2974                fresh_msg = "fresh"
2975        elif undeps:
2976            fresh_msg = "stale due to changed suppression (%s)" % " ".join(sorted(undeps))
2977        elif stale_scc:
2978            fresh_msg = "inherently stale"
2979            if stale_scc != ascc:
2980                fresh_msg += " (%s)" % " ".join(sorted(stale_scc))
2981            if stale_deps:
2982                fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps))
2983        else:
2984            fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps))
2985
2986        # Initialize transitive_error for all SCC members from union
2987        # of transitive_error of dependencies.
2988        if any(graph[dep].transitive_error for dep in deps if dep in graph):
2989            for id in scc:
2990                graph[id].transitive_error = True
2991
2992        scc_str = " ".join(scc)
2993        if fresh:
2994            manager.trace("Queuing %s SCC (%s)" % (fresh_msg, scc_str))
2995            fresh_scc_queue.append(scc)
2996        else:
2997            if len(fresh_scc_queue) > 0:
2998                manager.log("Processing {} queued fresh SCCs".format(len(fresh_scc_queue)))
2999                # Defer processing fresh SCCs until we actually run into a stale SCC
3000                # and need the earlier modules to be loaded.
3001                #
3002                # Note that `process_graph` may end with us not having processed every
3003                # single fresh SCC. This is intentional -- we don't need those modules
3004                # loaded if there are no more stale SCCs to be rechecked.
3005                #
3006                # Also note we shouldn't have to worry about transitive_error here,
3007                # since modules with transitive errors aren't written to the cache,
3008                # and if any dependencies were changed, this SCC would be stale.
3009                # (Also, in quick_and_dirty mode we don't care about transitive errors.)
3010                #
3011                # TODO: see if it's possible to determine if we need to process only a
3012                # _subset_ of the past SCCs instead of having to process them all.
3013                for prev_scc in fresh_scc_queue:
3014                    process_fresh_modules(graph, prev_scc, manager)
3015                fresh_scc_queue = []
3016            size = len(scc)
3017            if size == 1:
3018                manager.log("Processing SCC singleton (%s) as %s" % (scc_str, fresh_msg))
3019            else:
3020                manager.log("Processing SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg))
3021            process_stale_scc(graph, scc, manager)
3022
3023    sccs_left = len(fresh_scc_queue)
3024    nodes_left = sum(len(scc) for scc in fresh_scc_queue)
3025    manager.add_stats(sccs_left=sccs_left, nodes_left=nodes_left)
3026    if sccs_left:
3027        manager.log("{} fresh SCCs ({} nodes) left in queue (and will remain unprocessed)"
3028                    .format(sccs_left, nodes_left))
3029        manager.trace(str(fresh_scc_queue))
3030    else:
3031        manager.log("No fresh SCCs left in queue")
3032
3033
3034def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]:
3035    """Come up with the ideal processing order within an SCC.
3036
3037    Using the priorities assigned by all_imported_modules_in_file(),
3038    try to reduce the cycle to a DAG, by omitting arcs representing
3039    dependencies of lower priority.
3040
3041    In the simplest case, if we have A <--> B where A has a top-level
3042    "import B" (medium priority) but B only has the reverse "import A"
3043    inside a function (low priority), we turn the cycle into a DAG by
3044    dropping the B --> A arc, which leaves only A --> B.
3045
3046    If all arcs have the same priority, we fall back to sorting by
3047    reverse global order (the order in which modules were first
3048    encountered).
3049
3050    The algorithm is recursive, as follows: when as arcs of different
3051    priorities are present, drop all arcs of the lowest priority,
3052    identify SCCs in the resulting graph, and apply the algorithm to
3053    each SCC thus found.  The recursion is bounded because at each
3054    recursion the spread in priorities is (at least) one less.
3055
3056    In practice there are only a few priority levels (less than a
3057    dozen) and in the worst case we just carry out the same algorithm
3058    for finding SCCs N times.  Thus the complexity is no worse than
3059    the complexity of the original SCC-finding algorithm -- see
3060    strongly_connected_components() below for a reference.
3061    """
3062    if len(ascc) == 1:
3063        return [s for s in ascc]
3064    pri_spread = set()
3065    for id in ascc:
3066        state = graph[id]
3067        for dep in state.dependencies:
3068            if dep in ascc:
3069                pri = state.priorities.get(dep, PRI_HIGH)
3070                if pri < pri_max:
3071                    pri_spread.add(pri)
3072    if len(pri_spread) == 1:
3073        # Filtered dependencies are uniform -- order by global order.
3074        return sorted(ascc, key=lambda id: -graph[id].order)
3075    pri_max = max(pri_spread)
3076    sccs = sorted_components(graph, ascc, pri_max)
3077    # The recursion is bounded by the len(pri_spread) check above.
3078    return [s for ss in sccs for s in order_ascc(graph, ss, pri_max)]
3079
3080
3081def process_fresh_modules(graph: Graph, modules: List[str], manager: BuildManager) -> None:
3082    """Process the modules in one group of modules from their cached data.
3083
3084    This can be used to process an SCC of modules
3085    This involves loading the tree from JSON and then doing various cleanups.
3086    """
3087    t0 = time.time()
3088    for id in modules:
3089        graph[id].load_tree()
3090    t1 = time.time()
3091    for id in modules:
3092        graph[id].fix_cross_refs()
3093    t2 = time.time()
3094    manager.add_stats(process_fresh_time=t2 - t0, load_tree_time=t1 - t0)
3095
3096
3097def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> None:
3098    """Process the modules in one SCC from source code.
3099
3100    Exception: If quick_and_dirty is set, use the cache for fresh modules.
3101    """
3102    stale = scc
3103    for id in stale:
3104        # We may already have parsed the module, or not.
3105        # If the former, parse_file() is a no-op.
3106        graph[id].parse_file()
3107    if 'typing' in scc:
3108        # For historical reasons we need to manually add typing aliases
3109        # for built-in generic collections, see docstring of
3110        # SemanticAnalyzerPass2.add_builtin_aliases for details.
3111        typing_mod = graph['typing'].tree
3112        assert typing_mod, "The typing module was not parsed"
3113    mypy.semanal_main.semantic_analysis_for_scc(graph, scc, manager.errors)
3114
3115    # Track what modules aren't yet done so we can finish them as soon
3116    # as possible, saving memory.
3117    unfinished_modules = set(stale)
3118    for id in stale:
3119        graph[id].type_check_first_pass()
3120        if not graph[id].type_checker().deferred_nodes:
3121            unfinished_modules.discard(id)
3122            graph[id].finish_passes()
3123
3124    while unfinished_modules:
3125        for id in stale:
3126            if id not in unfinished_modules:
3127                continue
3128            if not graph[id].type_check_second_pass():
3129                unfinished_modules.discard(id)
3130                graph[id].finish_passes()
3131    for id in stale:
3132        graph[id].generate_unused_ignore_notes()
3133    if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
3134        for id in stale:
3135            graph[id].transitive_error = True
3136    for id in stale:
3137        manager.flush_errors(manager.errors.file_messages(graph[id].xpath), False)
3138        graph[id].write_cache()
3139        graph[id].mark_as_rechecked()
3140
3141
3142def sorted_components(graph: Graph,
3143                      vertices: Optional[AbstractSet[str]] = None,
3144                      pri_max: int = PRI_ALL) -> List[AbstractSet[str]]:
3145    """Return the graph's SCCs, topologically sorted by dependencies.
3146
3147    The sort order is from leaves (nodes without dependencies) to
3148    roots (nodes on which no other nodes depend).
3149
3150    This works for a subset of the full dependency graph too;
3151    dependencies that aren't present in graph.keys() are ignored.
3152    """
3153    # Compute SCCs.
3154    if vertices is None:
3155        vertices = set(graph)
3156    edges = {id: deps_filtered(graph, vertices, id, pri_max) for id in vertices}
3157    sccs = list(strongly_connected_components(vertices, edges))
3158    # Topsort.
3159    sccsmap = {id: frozenset(scc) for scc in sccs for id in scc}
3160    data = {}  # type: Dict[AbstractSet[str], Set[AbstractSet[str]]]
3161    for scc in sccs:
3162        deps = set()  # type: Set[AbstractSet[str]]
3163        for id in scc:
3164            deps.update(sccsmap[x] for x in deps_filtered(graph, vertices, id, pri_max))
3165        data[frozenset(scc)] = deps
3166    res = []
3167    for ready in topsort(data):
3168        # Sort the sets in ready by reversed smallest State.order.  Examples:
3169        #
3170        # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get
3171        #   [{y}, {x}].
3172        #
3173        # - If ready is [{a, b}, {c, d}], a.order == 1, b.order == 3,
3174        #   c.order == 2, d.order == 4, the sort keys become [1, 2]
3175        #   and the result is [{c, d}, {a, b}].
3176        res.extend(sorted(ready,
3177                          key=lambda scc: -min(graph[id].order for id in scc)))
3178    return res
3179
3180
3181def deps_filtered(graph: Graph, vertices: AbstractSet[str], id: str, pri_max: int) -> List[str]:
3182    """Filter dependencies for id with pri < pri_max."""
3183    if id not in vertices:
3184        return []
3185    state = graph[id]
3186    return [dep
3187            for dep in state.dependencies
3188            if dep in vertices and state.priorities.get(dep, PRI_HIGH) < pri_max]
3189
3190
3191def strongly_connected_components(vertices: AbstractSet[str],
3192                                  edges: Dict[str, List[str]]) -> Iterator[Set[str]]:
3193    """Compute Strongly Connected Components of a directed graph.
3194
3195    Args:
3196      vertices: the labels for the vertices
3197      edges: for each vertex, gives the target vertices of its outgoing edges
3198
3199    Returns:
3200      An iterator yielding strongly connected components, each
3201      represented as a set of vertices.  Each input vertex will occur
3202      exactly once; vertices not part of a SCC are returned as
3203      singleton sets.
3204
3205    From http://code.activestate.com/recipes/578507/.
3206    """
3207    identified = set()  # type: Set[str]
3208    stack = []  # type: List[str]
3209    index = {}  # type: Dict[str, int]
3210    boundaries = []  # type: List[int]
3211
3212    def dfs(v: str) -> Iterator[Set[str]]:
3213        index[v] = len(stack)
3214        stack.append(v)
3215        boundaries.append(index[v])
3216
3217        for w in edges[v]:
3218            if w not in index:
3219                yield from dfs(w)
3220            elif w not in identified:
3221                while index[w] < boundaries[-1]:
3222                    boundaries.pop()
3223
3224        if boundaries[-1] == index[v]:
3225            boundaries.pop()
3226            scc = set(stack[index[v]:])
3227            del stack[index[v]:]
3228            identified.update(scc)
3229            yield scc
3230
3231    for v in vertices:
3232        if v not in index:
3233            yield from dfs(v)
3234
3235
3236def topsort(data: Dict[AbstractSet[str],
3237                       Set[AbstractSet[str]]]) -> Iterable[Set[AbstractSet[str]]]:
3238    """Topological sort.
3239
3240    Args:
3241      data: A map from SCCs (represented as frozen sets of strings) to
3242            sets of SCCs, its dependencies.  NOTE: This data structure
3243            is modified in place -- for normalization purposes,
3244            self-dependencies are removed and entries representing
3245            orphans are added.
3246
3247    Returns:
3248      An iterator yielding sets of SCCs that have an equivalent
3249      ordering.  NOTE: The algorithm doesn't care about the internal
3250      structure of SCCs.
3251
3252    Example:
3253      Suppose the input has the following structure:
3254
3255        {A: {B, C}, B: {D}, C: {D}}
3256
3257      This is normalized to:
3258
3259        {A: {B, C}, B: {D}, C: {D}, D: {}}
3260
3261      The algorithm will yield the following values:
3262
3263        {D}
3264        {B, C}
3265        {A}
3266
3267    From http://code.activestate.com/recipes/577413/.
3268    """
3269    # TODO: Use a faster algorithm?
3270    for k, v in data.items():
3271        v.discard(k)  # Ignore self dependencies.
3272    for item in set.union(*data.values()) - set(data.keys()):
3273        data[item] = set()
3274    while True:
3275        ready = {item for item, dep in data.items() if not dep}
3276        if not ready:
3277            break
3278        yield ready
3279        data = {item: (dep - ready)
3280                for item, dep in data.items()
3281                if item not in ready}
3282    assert not data, "A cyclic dependency exists amongst %r" % data
3283
3284
3285def missing_stubs_file(cache_dir: str) -> str:
3286    return os.path.join(cache_dir, 'missing_stubs')
3287
3288
3289def record_missing_stub_packages(cache_dir: str, missing_stub_packages: Set[str]) -> None:
3290    """Write a file containing missing stub packages.
3291
3292    This allows a subsequent "mypy --install-types" run (without other arguments)
3293    to install missing stub packages.
3294    """
3295    fnam = missing_stubs_file(cache_dir)
3296    if missing_stub_packages:
3297        with open(fnam, 'w') as f:
3298            for pkg in sorted(missing_stub_packages):
3299                f.write('%s\n' % pkg)
3300    else:
3301        if os.path.isfile(fnam):
3302            os.remove(fnam)
3303