1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5# This module contains code for managing WebIDL files and bindings for
6# the build system.
7
8from __future__ import print_function, unicode_literals
9
10import errno
11import hashlib
12import io
13import json
14import logging
15import os
16import six
17
18from copy import deepcopy
19
20from mach.mixin.logging import LoggingMixin
21
22from mozbuild.makeutil import Makefile
23from mozbuild.pythonutil import iter_modules_in_path
24from mozbuild.util import FileAvoidWrite
25
26import mozpack.path as mozpath
27import buildconfig
28
29# There are various imports in this file in functions to avoid adding
30# dependencies to config.status. See bug 949875.
31
32
33class BuildResult(object):
34    """Represents the result of processing WebIDL files.
35
36    This holds a summary of output file generation during code generation.
37    """
38
39    def __init__(self):
40        # The .webidl files that had their outputs regenerated.
41        self.inputs = set()
42
43        # The output files that were created.
44        self.created = set()
45
46        # The output files that changed.
47        self.updated = set()
48
49        # The output files that didn't change.
50        self.unchanged = set()
51
52
53class WebIDLCodegenManagerState(dict):
54    """Holds state for the WebIDL code generation manager.
55
56    State is currently just an extended dict. The internal implementation of
57    state should be considered a black box to everyone except
58    WebIDLCodegenManager. But we'll still document it.
59
60    Fields:
61
62    version
63       The integer version of the format. This is to detect incompatible
64       changes between state. It should be bumped whenever the format
65       changes or semantics change.
66
67    webidls
68       A dictionary holding information about every known WebIDL input.
69       Keys are the basenames of input WebIDL files. Values are dicts of
70       metadata. Keys in those dicts are:
71
72       * filename - The full path to the input filename.
73       * inputs - A set of full paths to other webidl files this webidl
74         depends on.
75       * outputs - Set of full output paths that are created/derived from
76         this file.
77       * sha1 - The hexidecimal SHA-1 of the input filename from the last
78         processing time.
79
80    global_inputs
81       A dictionary defining files that influence all processing. Keys
82       are full filenames. Values are hexidecimal SHA-1 from the last
83       processing time.
84
85    dictionaries_convertible_to_js
86       A set of names of dictionaries that are convertible to JS.
87
88    dictionaries_convertible_from_js
89       A set of names of dictionaries that are convertible from JS.
90    """
91
92    VERSION = 3
93
94    def __init__(self, fh=None):
95        self["version"] = self.VERSION
96        self["webidls"] = {}
97        self["global_depends"] = {}
98
99        if not fh:
100            return
101
102        state = json.load(fh)
103        if state["version"] != self.VERSION:
104            raise Exception("Unknown state version: %s" % state["version"])
105
106        self["version"] = state["version"]
107        self["global_depends"] = state["global_depends"]
108
109        for k, v in state["webidls"].items():
110            self["webidls"][k] = v
111
112            # Sets are converted to lists for serialization because JSON
113            # doesn't support sets.
114            self["webidls"][k]["inputs"] = set(v["inputs"])
115            self["webidls"][k]["outputs"] = set(v["outputs"])
116
117        self["dictionaries_convertible_to_js"] = set(
118            state["dictionaries_convertible_to_js"]
119        )
120
121        self["dictionaries_convertible_from_js"] = set(
122            state["dictionaries_convertible_from_js"]
123        )
124
125    def dump(self, fh):
126        """Dump serialized state to a file handle."""
127        normalized = deepcopy(self)
128
129        for k, v in self["webidls"].items():
130            # Convert sets to lists because JSON doesn't support sets.
131            normalized["webidls"][k]["outputs"] = sorted(v["outputs"])
132            normalized["webidls"][k]["inputs"] = sorted(v["inputs"])
133
134        normalized["dictionaries_convertible_to_js"] = sorted(
135            self["dictionaries_convertible_to_js"]
136        )
137
138        normalized["dictionaries_convertible_from_js"] = sorted(
139            self["dictionaries_convertible_from_js"]
140        )
141
142        json.dump(normalized, fh, sort_keys=True)
143
144
145class WebIDLCodegenManager(LoggingMixin):
146    """Manages all code generation around WebIDL.
147
148    To facilitate testing, this object is meant to be generic and reusable.
149    Paths, etc should be parameters and not hardcoded.
150    """
151
152    # Global parser derived declaration files.
153    GLOBAL_DECLARE_FILES = {
154        "BindingNames.h",
155        "GeneratedAtomList.h",
156        "GeneratedEventList.h",
157        "PrototypeList.h",
158        "RegisterBindings.h",
159        "RegisterWorkerBindings.h",
160        "RegisterWorkerDebuggerBindings.h",
161        "RegisterWorkletBindings.h",
162        "UnionConversions.h",
163        "UnionTypes.h",
164        "WebIDLPrefs.h",
165        "WebIDLSerializable.h",
166    }
167
168    # Global parser derived definition files.
169    GLOBAL_DEFINE_FILES = {
170        "BindingNames.cpp",
171        "RegisterBindings.cpp",
172        "RegisterWorkerBindings.cpp",
173        "RegisterWorkerDebuggerBindings.cpp",
174        "RegisterWorkletBindings.cpp",
175        "UnionTypes.cpp",
176        "PrototypeList.cpp",
177        "WebIDLPrefs.cpp",
178        "WebIDLSerializable.cpp",
179    }
180
181    def __init__(
182        self,
183        config_path,
184        webidl_root,
185        inputs,
186        exported_header_dir,
187        codegen_dir,
188        state_path,
189        cache_dir=None,
190        make_deps_path=None,
191        make_deps_target=None,
192        use_builtin_readable_stream=True,
193    ):
194        """Create an instance that manages WebIDLs in the build system.
195
196        config_path refers to a WebIDL config file (e.g. Bindings.conf).
197        inputs is a 4-tuple describing the input .webidl files and how to
198        process them. Members are:
199            (set(.webidl files), set(basenames of exported files),
200                set(basenames of generated events files),
201                set(example interface names))
202
203        exported_header_dir and codegen_dir are directories where generated
204        files will be written to.
205        state_path is the path to a file that will receive JSON state from our
206        actions.
207        make_deps_path is the path to a make dependency file that we can
208        optionally write.
209        make_deps_target is the target that receives the make dependencies. It
210        must be defined if using make_deps_path.
211        """
212        self.populate_logger()
213
214        input_paths, exported_stems, generated_events_stems, example_interfaces = inputs
215
216        self._config_path = config_path
217        self._webidl_root = webidl_root
218        self._input_paths = set(input_paths)
219        self._exported_stems = set(exported_stems)
220        self._generated_events_stems = set(generated_events_stems)
221        self._generated_events_stems_as_array = generated_events_stems
222        self._example_interfaces = set(example_interfaces)
223        self._exported_header_dir = exported_header_dir
224        self._codegen_dir = codegen_dir
225        self._state_path = state_path
226        self._cache_dir = cache_dir
227        self._make_deps_path = make_deps_path
228        self._make_deps_target = make_deps_target
229        self._use_builtin_readable_stream = use_builtin_readable_stream
230
231        if (make_deps_path and not make_deps_target) or (
232            not make_deps_path and make_deps_target
233        ):
234            raise Exception(
235                "Must define both make_deps_path and make_deps_target "
236                "if one is defined."
237            )
238
239        self._parser_results = None
240        self._config = None
241        self._state = WebIDLCodegenManagerState()
242
243        if os.path.exists(state_path):
244            with io.open(state_path, "r") as fh:
245                try:
246                    self._state = WebIDLCodegenManagerState(fh=fh)
247                except Exception as e:
248                    self.log(
249                        logging.WARN,
250                        "webidl_bad_state",
251                        {"msg": str(e)},
252                        "Bad WebIDL state: {msg}",
253                    )
254
255    @property
256    def config(self):
257        if not self._config:
258            self._parse_webidl()
259
260        return self._config
261
262    def generate_build_files(self):
263        """Generate files required for the build.
264
265        This function is in charge of generating all the .h/.cpp files derived
266        from input .webidl files. Please note that there are build actions
267        required to produce .webidl files and these build actions are
268        explicitly not captured here: this function assumes all .webidl files
269        are present and up to date.
270
271        This routine is called as part of the build to ensure files that need
272        to exist are present and up to date. This routine may not be called if
273        the build dependencies (generated as a result of calling this the first
274        time) say everything is up to date.
275
276        Because reprocessing outputs for every .webidl on every invocation
277        is expensive, we only regenerate the minimal set of files on every
278        invocation. The rules for deciding what needs done are roughly as
279        follows:
280
281        1. If any .webidl changes, reparse all .webidl files and regenerate
282           the global derived files. Only regenerate output files (.h/.cpp)
283           impacted by the modified .webidl files.
284        2. If an non-.webidl dependency (Python files, config file) changes,
285           assume everything is out of date and regenerate the world. This
286           is because changes in those could globally impact every output
287           file.
288        3. If an output file is missing, ensure it is present by performing
289           necessary regeneration.
290        """
291        # Despite #1 above, we assume the build system is smart enough to not
292        # invoke us if nothing has changed. Therefore, any invocation means
293        # something has changed. And, if anything has changed, we need to
294        # parse the WebIDL.
295        self._parse_webidl()
296
297        result = BuildResult()
298
299        # If we parse, we always update globals - they are cheap and it is
300        # easier that way.
301        created, updated, unchanged = self._write_global_derived()
302        result.created |= created
303        result.updated |= updated
304        result.unchanged |= unchanged
305
306        # If any of the extra dependencies changed, regenerate the world.
307        global_changed, global_hashes = self._global_dependencies_changed()
308        if global_changed:
309            # Make a copy because we may modify.
310            changed_inputs = set(self._input_paths)
311        else:
312            changed_inputs = self._compute_changed_inputs()
313
314        self._state["global_depends"] = global_hashes
315        self._state["dictionaries_convertible_to_js"] = set(
316            d.identifier.name for d in self._config.getDictionariesConvertibleToJS()
317        )
318        self._state["dictionaries_convertible_from_js"] = set(
319            d.identifier.name for d in self._config.getDictionariesConvertibleFromJS()
320        )
321
322        # Generate bindings from .webidl files.
323        for filename in sorted(changed_inputs):
324            basename = mozpath.basename(filename)
325            result.inputs.add(filename)
326            written, deps = self._generate_build_files_for_webidl(filename)
327            result.created |= written[0]
328            result.updated |= written[1]
329            result.unchanged |= written[2]
330
331            self._state["webidls"][basename] = dict(
332                filename=filename,
333                outputs=written[0] | written[1] | written[2],
334                inputs=set(deps),
335                sha1=self._input_hashes[filename],
336            )
337
338        # Process some special interfaces required for testing.
339        for interface in self._example_interfaces:
340            written = self.generate_example_files(interface)
341            result.created |= written[0]
342            result.updated |= written[1]
343            result.unchanged |= written[2]
344
345        # Generate a make dependency file.
346        if self._make_deps_path:
347            mk = Makefile()
348            codegen_rule = mk.create_rule([self._make_deps_target])
349            codegen_rule.add_dependencies(
350                six.ensure_text(s) for s in global_hashes.keys()
351            )
352            codegen_rule.add_dependencies(six.ensure_text(p) for p in self._input_paths)
353
354            with FileAvoidWrite(self._make_deps_path) as fh:
355                mk.dump(fh)
356
357        self._save_state()
358
359        return result
360
361    def generate_example_files(self, interface):
362        """Generates example files for a given interface."""
363        from Codegen import CGExampleRoot
364
365        root = CGExampleRoot(self.config, interface)
366
367        example_paths = self._example_paths(interface)
368        for path in example_paths:
369            print("Generating {}".format(path))
370
371        return self._maybe_write_codegen(root, *example_paths)
372
373    def _parse_webidl(self):
374        import WebIDL
375        from Configuration import Configuration
376
377        self.log(
378            logging.INFO,
379            "webidl_parse",
380            {"count": len(self._input_paths)},
381            "Parsing {count} WebIDL files.",
382        )
383
384        hashes = {}
385        parser = WebIDL.Parser(
386            self._cache_dir,
387            lexer=None,
388            use_builtin_readable_stream=self._use_builtin_readable_stream,
389        )
390
391        for path in sorted(self._input_paths):
392            with io.open(path, "r", encoding="utf-8") as fh:
393                data = fh.read()
394                hashes[path] = hashlib.sha1(six.ensure_binary(data)).hexdigest()
395                parser.parse(data, path)
396
397        # Only these directories may contain WebIDL files with interfaces
398        # which are exposed to the web. WebIDL files in these roots may not
399        # be changed without DOM peer review.
400        #
401        # Other directories may contain WebIDL files as long as they only
402        # contain ChromeOnly interfaces. These are not subject to mandatory
403        # DOM peer review.
404        web_roots = (
405            # The main WebIDL root.
406            self._webidl_root,
407            # The binding config root, which contains some test-only
408            # interfaces.
409            os.path.dirname(self._config_path),
410            # The objdir sub-directory which contains generated WebIDL files.
411            self._codegen_dir,
412        )
413
414        self._parser_results = parser.finish()
415        self._config = Configuration(
416            self._config_path,
417            web_roots,
418            self._parser_results,
419            self._generated_events_stems_as_array,
420        )
421        self._input_hashes = hashes
422
423    def _write_global_derived(self):
424        from Codegen import GlobalGenRoots
425
426        things = [("declare", f) for f in self.GLOBAL_DECLARE_FILES]
427        things.extend(("define", f) for f in self.GLOBAL_DEFINE_FILES)
428
429        result = (set(), set(), set())
430
431        for what, filename in things:
432            stem = mozpath.splitext(filename)[0]
433            root = getattr(GlobalGenRoots, stem)(self._config)
434
435            if what == "declare":
436                code = root.declare()
437                output_root = self._exported_header_dir
438            elif what == "define":
439                code = root.define()
440                output_root = self._codegen_dir
441            else:
442                raise Exception("Unknown global gen type: %s" % what)
443
444            output_path = mozpath.join(output_root, filename)
445            self._maybe_write_file(output_path, code, result)
446
447        return result
448
449    def _compute_changed_inputs(self):
450        """Compute the set of input files that need to be regenerated."""
451        changed_inputs = set()
452        expected_outputs = self.expected_build_output_files()
453
454        # Look for missing output files.
455        if any(not os.path.exists(f) for f in expected_outputs):
456            # FUTURE Bug 940469 Only regenerate minimum set.
457            changed_inputs |= self._input_paths
458
459        # That's it for examining output files. We /could/ examine SHA-1's of
460        # output files from a previous run to detect modifications. But that's
461        # a lot of extra work and most build systems don't do that anyway.
462
463        # Now we move on to the input files.
464        old_hashes = {v["filename"]: v["sha1"] for v in self._state["webidls"].values()}
465
466        old_filenames = set(old_hashes.keys())
467        new_filenames = self._input_paths
468
469        # If an old file has disappeared or a new file has arrived, mark
470        # it.
471        changed_inputs |= old_filenames ^ new_filenames
472
473        # For the files in common between runs, compare content. If the file
474        # has changed, mark it. We don't need to perform mtime comparisons
475        # because content is a stronger validator.
476        for filename in old_filenames & new_filenames:
477            if old_hashes[filename] != self._input_hashes[filename]:
478                changed_inputs.add(filename)
479
480        # We've now populated the base set of inputs that have changed.
481
482        # Inherit dependencies from previous run. The full set of dependencies
483        # is associated with each record, so we don't need to perform any fancy
484        # graph traversal.
485        for v in self._state["webidls"].values():
486            if any(dep for dep in v["inputs"] if dep in changed_inputs):
487                changed_inputs.add(v["filename"])
488
489        # Now check for changes to the set of dictionaries that are convertible to JS
490        oldDictionariesConvertibleToJS = self._state["dictionaries_convertible_to_js"]
491        newDictionariesConvertibleToJS = self._config.getDictionariesConvertibleToJS()
492        newNames = set(d.identifier.name for d in newDictionariesConvertibleToJS)
493        changedDictionaryNames = oldDictionariesConvertibleToJS ^ newNames
494
495        # Now check for changes to the set of dictionaries that are convertible from JS
496        oldDictionariesConvertibleFromJS = self._state[
497            "dictionaries_convertible_from_js"
498        ]
499        newDictionariesConvertibleFromJS = (
500            self._config.getDictionariesConvertibleFromJS()
501        )
502        newNames = set(d.identifier.name for d in newDictionariesConvertibleFromJS)
503        changedDictionaryNames |= oldDictionariesConvertibleFromJS ^ newNames
504
505        for name in changedDictionaryNames:
506            d = self._config.getDictionaryIfExists(name)
507            if d:
508                changed_inputs.add(d.filename())
509
510        # Only use paths that are known to our current state.
511        # This filters out files that were deleted or changed type (e.g. from
512        # static to preprocessed).
513        return changed_inputs & self._input_paths
514
515    def _binding_info(self, p):
516        """Compute binding metadata for an input path.
517
518        Returns a tuple of:
519
520          (stem, binding_stem, is_event, output_files)
521
522        output_files is itself a tuple. The first two items are the binding
523        header and C++ paths, respectively. The 2nd pair are the event header
524        and C++ paths or None if this isn't an event binding.
525        """
526        basename = mozpath.basename(p)
527        stem = mozpath.splitext(basename)[0]
528        binding_stem = "%sBinding" % stem
529
530        if stem in self._exported_stems:
531            header_dir = self._exported_header_dir
532        else:
533            header_dir = self._codegen_dir
534
535        is_event = stem in self._generated_events_stems
536
537        files = (
538            mozpath.join(header_dir, "%s.h" % binding_stem),
539            mozpath.join(self._codegen_dir, "%s.cpp" % binding_stem),
540            mozpath.join(header_dir, "%s.h" % stem) if is_event else None,
541            mozpath.join(self._codegen_dir, "%s.cpp" % stem) if is_event else None,
542        )
543
544        return stem, binding_stem, is_event, header_dir, files
545
546    def _example_paths(self, interface):
547        return (
548            mozpath.join(self._codegen_dir, "%s-example.h" % interface),
549            mozpath.join(self._codegen_dir, "%s-example.cpp" % interface),
550        )
551
552    def expected_build_output_files(self):
553        """Obtain the set of files generate_build_files() should write."""
554        paths = set()
555
556        # Account for global generation.
557        for p in self.GLOBAL_DECLARE_FILES:
558            paths.add(mozpath.join(self._exported_header_dir, p))
559        for p in self.GLOBAL_DEFINE_FILES:
560            paths.add(mozpath.join(self._codegen_dir, p))
561
562        for p in self._input_paths:
563            stem, binding_stem, is_event, header_dir, files = self._binding_info(p)
564            paths |= {f for f in files if f}
565
566        for interface in self._example_interfaces:
567            for p in self._example_paths(interface):
568                paths.add(p)
569
570        return paths
571
572    def _generate_build_files_for_webidl(self, filename):
573        from Codegen import (
574            CGBindingRoot,
575            CGEventRoot,
576        )
577
578        self.log(
579            logging.INFO,
580            "webidl_generate_build_for_input",
581            {"filename": filename},
582            "Generating WebIDL files derived from {filename}",
583        )
584
585        stem, binding_stem, is_event, header_dir, files = self._binding_info(filename)
586        root = CGBindingRoot(self._config, binding_stem, filename)
587
588        result = self._maybe_write_codegen(root, files[0], files[1])
589
590        if is_event:
591            generated_event = CGEventRoot(self._config, stem)
592            result = self._maybe_write_codegen(
593                generated_event, files[2], files[3], result
594            )
595
596        return result, root.deps()
597
598    def _global_dependencies_changed(self):
599        """Determine whether the global dependencies have changed."""
600        current_files = set(iter_modules_in_path(mozpath.dirname(__file__)))
601
602        # We need to catch other .py files from /dom/bindings. We assume these
603        # are in the same directory as the config file.
604        current_files |= set(iter_modules_in_path(mozpath.dirname(self._config_path)))
605
606        current_files.add(self._config_path)
607
608        current_hashes = {}
609        for f in current_files:
610            # This will fail if the file doesn't exist. If a current global
611            # dependency doesn't exist, something else is wrong.
612            with io.open(f, "rb") as fh:
613                current_hashes[f] = hashlib.sha1(fh.read()).hexdigest()
614
615        # The set of files has changed.
616        if current_files ^ set(self._state["global_depends"].keys()):
617            return True, current_hashes
618
619        # Compare hashes.
620        for f, sha1 in current_hashes.items():
621            if sha1 != self._state["global_depends"][f]:
622                return True, current_hashes
623
624        return False, current_hashes
625
626    def _save_state(self):
627        with io.open(self._state_path, "w", newline="\n") as fh:
628            self._state.dump(fh)
629
630    def _maybe_write_codegen(self, obj, declare_path, define_path, result=None):
631        assert declare_path and define_path
632        if not result:
633            result = (set(), set(), set())
634
635        self._maybe_write_file(declare_path, obj.declare(), result)
636        self._maybe_write_file(define_path, obj.define(), result)
637
638        return result
639
640    def _maybe_write_file(self, path, content, result):
641        fh = FileAvoidWrite(path)
642        fh.write(content)
643        existed, updated = fh.close()
644
645        if not existed:
646            result[0].add(path)
647        elif updated:
648            result[1].add(path)
649        else:
650            result[2].add(path)
651
652
653def create_build_system_manager(
654    topsrcdir=None, topobjdir=None, dist_dir=None, use_builtin_readable_stream=None
655):
656    """Create a WebIDLCodegenManager for use by the build system."""
657    if topsrcdir is None:
658        assert (
659            topobjdir is None
660            and dist_dir is None
661            and use_builtin_readable_stream is None
662        )
663        import buildconfig
664
665        topsrcdir = buildconfig.topsrcdir
666        topobjdir = buildconfig.topobjdir
667        dist_dir = buildconfig.substs["DIST"]
668        use_builtin_readable_stream = not buildconfig.substs.get("MOZ_DOM_STREAMS")
669
670    src_dir = os.path.join(topsrcdir, "dom", "bindings")
671    obj_dir = os.path.join(topobjdir, "dom", "bindings")
672    webidl_root = os.path.join(topsrcdir, "dom", "webidl")
673
674    with io.open(os.path.join(obj_dir, "file-lists.json"), "r") as fh:
675        files = json.load(fh)
676
677    inputs = (
678        files["webidls"],
679        files["exported_stems"],
680        files["generated_events_stems"],
681        files["example_interfaces"],
682    )
683
684    cache_dir = os.path.join(obj_dir, "_cache")
685    try:
686        os.makedirs(cache_dir)
687    except OSError as e:
688        if e.errno != errno.EEXIST:
689            raise
690
691    return WebIDLCodegenManager(
692        os.path.join(src_dir, "Bindings.conf"),
693        webidl_root,
694        inputs,
695        os.path.join(dist_dir, "include", "mozilla", "dom"),
696        obj_dir,
697        os.path.join(obj_dir, "codegen.json"),
698        cache_dir=cache_dir,
699        # The make rules include a codegen.pp file containing dependencies.
700        make_deps_path=os.path.join(obj_dir, "codegen.pp"),
701        make_deps_target="webidl.stub",
702        use_builtin_readable_stream=use_builtin_readable_stream,
703    )
704