1#!/usr/bin/env python3
2# Copyright 2020 The gRPC Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16# Script to extract build metadata from bazel BUILD.
17# To avoid having two sources of truth for the build metadata (build
18# targets, source files, header files etc.), this script analyzes the contents
19# of bazel BUILD files and generates a YAML file (currently called
20# build_autogenerated.yaml). The format and semantics of the generated YAML files
21# is chosen to match the format of a "build.yaml" file, which used
22# to be build the source of truth for gRPC build before bazel became
23# the primary build system.
24# A good basic overview of the "build.yaml" format is available here:
25# https://github.com/grpc/grpc/blob/master/templates/README.md. Note that
26# while useful as an overview, the doc does not act as formal spec
27# (formal spec does not exist in fact) and the doc can be incomplete,
28# inaccurate or slightly out of date.
29# TODO(jtattermusch): In the future we want to get rid of the legacy build.yaml
30# format entirely or simplify it to a point where it becomes self-explanatory
31# and doesn't need any detailed documentation.
32
33import collections
34import os
35import re
36import subprocess
37import sys
38from typing import Any, Dict, Iterable, List, Optional
39import xml.etree.ElementTree as ET
40
41import build_cleaner
42import yaml
43
44BuildMetadata = Dict[str, Any]
45BuildDict = Dict[str, BuildMetadata]
46BuildYaml = Dict[str, Any]
47
48
49def _bazel_query_xml_tree(query: str) -> ET.Element:
50    """Get xml output of bazel query invocation, parsed as XML tree"""
51    output = subprocess.check_output(
52        ['tools/bazel', 'query', '--noimplicit_deps', '--output', 'xml', query])
53    return ET.fromstring(output)
54
55
56def _rule_dict_from_xml_node(rule_xml_node):
57    """Converts XML node representing a rule (obtained from "bazel query --output xml") to a dictionary that contains all the metadata we will need."""
58    result = {
59        'class': rule_xml_node.attrib.get('class'),
60        'name': rule_xml_node.attrib.get('name'),
61        'srcs': [],
62        'hdrs': [],
63        'deps': [],
64        'data': [],
65        'tags': [],
66        'args': [],
67        'generator_function': None,
68        'size': None,
69        'flaky': False,
70    }
71    for child in rule_xml_node:
72        # all the metadata we want is stored under "list" tags
73        if child.tag == 'list':
74            list_name = child.attrib['name']
75            if list_name in ['srcs', 'hdrs', 'deps', 'data', 'tags', 'args']:
76                result[list_name] += [item.attrib['value'] for item in child]
77        if child.tag == 'string':
78            string_name = child.attrib['name']
79            if string_name in ['generator_function', 'size']:
80                result[string_name] = child.attrib['value']
81        if child.tag == 'boolean':
82            bool_name = child.attrib['name']
83            if bool_name in ['flaky']:
84                result[bool_name] = child.attrib['value'] == 'true'
85    return result
86
87
88def _extract_rules_from_bazel_xml(xml_tree):
89    """Extract bazel rules from an XML tree node obtained from "bazel query --output xml" command."""
90    result = {}
91    for child in xml_tree:
92        if child.tag == 'rule':
93            rule_dict = _rule_dict_from_xml_node(child)
94            rule_clazz = rule_dict['class']
95            rule_name = rule_dict['name']
96            if rule_clazz in [
97                    'cc_library',
98                    'cc_binary',
99                    'cc_test',
100                    'cc_proto_library',
101                    'proto_library',
102                    'upb_proto_library',
103                    'upb_proto_reflection_library',
104            ]:
105                if rule_name in result:
106                    raise Exception('Rule %s already present' % rule_name)
107                result[rule_name] = rule_dict
108    return result
109
110
111def _get_bazel_label(target_name: str) -> str:
112    if ':' in target_name:
113        return '//%s' % target_name
114    else:
115        return '//:%s' % target_name
116
117
118def _extract_source_file_path(label: str) -> str:
119    """Gets relative path to source file from bazel deps listing"""
120    if label.startswith('//'):
121        label = label[len('//'):]
122    # labels in form //:src/core/lib/surface/call_test_only.h
123    if label.startswith(':'):
124        label = label[len(':'):]
125    # labels in form //test/core/util:port.cc
126    label = label.replace(':', '/')
127    return label
128
129
130def _extract_public_headers(bazel_rule: BuildMetadata) -> List[str]:
131    """Gets list of public headers from a bazel rule"""
132    result = []
133    for dep in bazel_rule['hdrs']:
134        if dep.startswith('//:include/') and dep.endswith('.h'):
135            result.append(_extract_source_file_path(dep))
136    return list(sorted(result))
137
138
139def _extract_nonpublic_headers(bazel_rule: BuildMetadata) -> List[str]:
140    """Gets list of non-public headers from a bazel rule"""
141    result = []
142    for dep in bazel_rule['hdrs']:
143        if dep.startswith('//') and not dep.startswith(
144                '//:include/') and dep.endswith('.h'):
145            result.append(_extract_source_file_path(dep))
146    return list(sorted(result))
147
148
149def _extract_sources(bazel_rule: BuildMetadata) -> List[str]:
150    """Gets list of source files from a bazel rule"""
151    result = []
152    for dep in bazel_rule['srcs']:
153        if dep.startswith('//') and (dep.endswith('.cc') or dep.endswith('.c')
154                                     or dep.endswith('.proto')):
155            result.append(_extract_source_file_path(dep))
156    return list(sorted(result))
157
158
159def _extract_deps(bazel_rule: BuildMetadata,
160                  bazel_rules: BuildDict) -> List[str]:
161    """Gets list of deps from from a bazel rule"""
162    return list(sorted(bazel_rule['deps']))
163
164
165def _create_target_from_bazel_rule(target_name: str,
166                                   bazel_rules: BuildDict) -> BuildMetadata:
167    """Create build.yaml-like target definition from bazel metadata"""
168    bazel_rule = bazel_rules[_get_bazel_label(target_name)]
169
170    # Create a template for our target from the bazel rule. Initially we only
171    # populate some "private" fields with the original info we got from bazel
172    # and only later we will populate the public fields (once we do some extra
173    # postprocessing).
174    result = {
175        'name': target_name,
176        '_PUBLIC_HEADERS_BAZEL': _extract_public_headers(bazel_rule),
177        '_HEADERS_BAZEL': _extract_nonpublic_headers(bazel_rule),
178        '_SRC_BAZEL': _extract_sources(bazel_rule),
179        '_DEPS_BAZEL': _extract_deps(bazel_rule, bazel_rules),
180        'public_headers': bazel_rule['_COLLAPSED_PUBLIC_HEADERS'],
181        'headers': bazel_rule['_COLLAPSED_HEADERS'],
182        'src': bazel_rule['_COLLAPSED_SRCS'],
183        'deps': bazel_rule['_COLLAPSED_DEPS'],
184    }
185    return result
186
187
188def _external_dep_name_from_bazel_dependency(bazel_dep: str) -> Optional[str]:
189    """Returns name of dependency if external bazel dependency is provided or None"""
190    if bazel_dep.startswith('@com_google_absl//'):
191        # special case for add dependency on one of the absl libraries (there is not just one absl library)
192        prefixlen = len('@com_google_absl//')
193        return bazel_dep[prefixlen:]
194    elif bazel_dep == '//external:upb_lib':
195        return 'upb'
196    elif bazel_dep == '//external:benchmark':
197        return 'benchmark'
198    elif bazel_dep == '//external:libssl':
199        return 'libssl'
200    else:
201        # all the other external deps such as protobuf, cares, zlib
202        # don't need to be listed explicitly, they are handled automatically
203        # by the build system (make, cmake)
204        return None
205
206
207def _compute_transitive_metadata(
208        rule_name: str, bazel_rules: Any,
209        bazel_label_to_dep_name: Dict[str, str]) -> None:
210    """Computes the final build metadata for Bazel target with rule_name.
211
212    The dependencies that will appear on the deps list are:
213
214    * Public build targets including binaries and tests;
215    * External targets, like absl, re2.
216
217    All other intermediate dependencies will be merged, which means their
218    source file, headers, etc. will be collected into one build target. This
219    step of processing will greatly reduce the complexity of the generated
220    build specifications for other build systems, like CMake, Make, setuptools.
221
222    The final build metadata are:
223    * _TRANSITIVE_DEPS: all the transitive dependencies including intermediate
224                        targets;
225    * _COLLAPSED_DEPS:  dependencies that fits our requirement above, and it
226                        will remove duplicated items and produce the shortest
227                        possible dependency list in alphabetical order;
228    * _COLLAPSED_SRCS:  the merged source files;
229    * _COLLAPSED_PUBLIC_HEADERS: the merged public headers;
230    * _COLLAPSED_HEADERS: the merged non-public headers;
231    * _EXCLUDE_DEPS: intermediate targets to exclude when performing collapsing
232      of sources and dependencies.
233
234    For the collapsed_deps, the algorithm improved cases like:
235
236    The result in the past:
237        end2end_tests -> [grpc_test_util, grpc, gpr, address_sorting, upb]
238        grpc_test_util -> [grpc, gpr, address_sorting, upb, ...]
239        grpc -> [gpr, address_sorting, upb, ...]
240
241    The result of the algorithm:
242        end2end_tests -> [grpc_test_util]
243        grpc_test_util -> [grpc]
244        grpc -> [gpr, address_sorting, upb, ...]
245    """
246    bazel_rule = bazel_rules[rule_name]
247    direct_deps = _extract_deps(bazel_rule, bazel_rules)
248    transitive_deps = set()
249    collapsed_deps = set()
250    exclude_deps = set()
251    collapsed_srcs = set(_extract_sources(bazel_rule))
252    collapsed_public_headers = set(_extract_public_headers(bazel_rule))
253    collapsed_headers = set(_extract_nonpublic_headers(bazel_rule))
254
255    for dep in direct_deps:
256        external_dep_name_maybe = _external_dep_name_from_bazel_dependency(dep)
257
258        if dep in bazel_rules:
259            # Descend recursively, but no need to do that for external deps
260            if external_dep_name_maybe is None:
261                if "_PROCESSING_DONE" not in bazel_rules[dep]:
262                    # This item is not processed before, compute now
263                    _compute_transitive_metadata(dep, bazel_rules,
264                                                 bazel_label_to_dep_name)
265
266                transitive_deps.update(bazel_rules[dep].get(
267                    '_TRANSITIVE_DEPS', []))
268                collapsed_deps.update(
269                    collapsed_deps, bazel_rules[dep].get('_COLLAPSED_DEPS', []))
270                exclude_deps.update(bazel_rules[dep].get('_EXCLUDE_DEPS', []))
271
272        # This dep is a public target, add it as a dependency
273        if dep in bazel_label_to_dep_name:
274            transitive_deps.update([bazel_label_to_dep_name[dep]])
275            collapsed_deps.update(collapsed_deps,
276                                  [bazel_label_to_dep_name[dep]])
277            # Add all the transitive deps of our every public dep to exclude
278            # list since we want to avoid building sources that are already
279            # built by our dependencies
280            exclude_deps.update(bazel_rules[dep]['_TRANSITIVE_DEPS'])
281            continue
282
283        # This dep is an external target, add it as a dependency
284        if external_dep_name_maybe is not None:
285            transitive_deps.update([external_dep_name_maybe])
286            collapsed_deps.update(collapsed_deps, [external_dep_name_maybe])
287            continue
288
289    # Direct dependencies are part of transitive dependencies
290    transitive_deps.update(direct_deps)
291
292    # Calculate transitive public deps (needed for collapsing sources)
293    transitive_public_deps = set(
294        filter(lambda x: x in bazel_label_to_dep_name, transitive_deps))
295
296    # Remove intermediate targets that our public dependencies already depend
297    # on. This is the step that further shorten the deps list.
298    collapsed_deps = set(filter(lambda x: x not in exclude_deps,
299                                collapsed_deps))
300
301    # Compute the final source files and headers for this build target whose
302    # name is `rule_name` (input argument of this function).
303    #
304    # Imaging a public target PX has transitive deps [IA, IB, PY, IC, PZ]. PX,
305    # PY and PZ are public build targets. And IA, IB, IC are intermediate
306    # targets. In addition, PY depends on IC.
307    #
308    # Translate the condition into dependency graph:
309    #   PX -> [IA, IB, PY, IC, PZ]
310    #   PY -> [IC]
311    #   Public targets: [PX, PY, PZ]
312    #
313    # The collapsed dependencies of PX: [PY, PZ].
314    # The excluded dependencies of X: [PY, IC, PZ].
315    # (IC is excluded as a dependency of PX. It is already included in PY, hence
316    # it would be redundant to include it again.)
317    #
318    # Target PX should include source files and headers of [PX, IA, IB] as final
319    # build metadata.
320    for dep in transitive_deps:
321        if dep not in exclude_deps and dep not in transitive_public_deps:
322            if dep in bazel_rules:
323                collapsed_srcs.update(_extract_sources(bazel_rules[dep]))
324                collapsed_public_headers.update(
325                    _extract_public_headers(bazel_rules[dep]))
326                collapsed_headers.update(
327                    _extract_nonpublic_headers(bazel_rules[dep]))
328    # This item is a "visited" flag
329    bazel_rule['_PROCESSING_DONE'] = True
330    # Following items are described in the docstinrg.
331    bazel_rule['_TRANSITIVE_DEPS'] = list(sorted(transitive_deps))
332    bazel_rule['_COLLAPSED_DEPS'] = list(sorted(collapsed_deps))
333    bazel_rule['_COLLAPSED_SRCS'] = list(sorted(collapsed_srcs))
334    bazel_rule['_COLLAPSED_PUBLIC_HEADERS'] = list(
335        sorted(collapsed_public_headers))
336    bazel_rule['_COLLAPSED_HEADERS'] = list(sorted(collapsed_headers))
337    bazel_rule['_EXCLUDE_DEPS'] = list(sorted(exclude_deps))
338
339
340# TODO(jtattermusch): deduplicate with transitive_dependencies.py (which has a slightly different logic)
341# TODO(jtattermusch): This is done to avoid introducing too many intermediate
342# libraries into the build.yaml-based builds (which might in cause issues
343# building language-specific artifacts) and also because the libraries
344# in build.yaml-based build are generally considered units of distributions
345# (= public libraries that are visible to the user and are installable),
346# while in bazel builds it is customary to define larger number of smaller
347# "sublibraries". The need for elision (and expansion)
348# of intermediate libraries can be re-evaluated in the future.
349def _populate_transitive_metadata(bazel_rules: Any,
350                                  public_dep_names: Iterable[str]) -> None:
351    """Add 'transitive_deps' field for each of the rules"""
352    # Create the map between Bazel label and public dependency name
353    bazel_label_to_dep_name = {}
354    for dep_name in public_dep_names:
355        bazel_label_to_dep_name[_get_bazel_label(dep_name)] = dep_name
356
357    # Make sure we reached all the Bazel rules
358    # TODO(lidiz) potentially we could only update a subset of rules
359    for rule_name in bazel_rules:
360        if '_PROCESSING_DONE' not in bazel_rules[rule_name]:
361            _compute_transitive_metadata(rule_name, bazel_rules,
362                                         bazel_label_to_dep_name)
363
364
365def update_test_metadata_with_transitive_metadata(
366        all_extra_metadata: BuildDict, bazel_rules: BuildDict) -> None:
367    """Patches test build metadata with transitive metadata."""
368    for lib_name, lib_dict in all_extra_metadata.items():
369        # Skip if it isn't not an test
370        if lib_dict.get('build') != 'test' or lib_dict.get('_TYPE') != 'target':
371            continue
372
373        bazel_rule = bazel_rules[_get_bazel_label(lib_name)]
374
375        if '//external:benchmark' in bazel_rule['_TRANSITIVE_DEPS']:
376            lib_dict['benchmark'] = True
377            lib_dict['defaults'] = 'benchmark'
378
379        if '//external:gtest' in bazel_rule['_TRANSITIVE_DEPS']:
380            lib_dict['gtest'] = True
381            lib_dict['language'] = 'c++'
382
383
384def _expand_upb_proto_library_rules(bazel_rules):
385    # Expand the .proto files from UPB proto library rules into the pre-generated
386    # upb.h and upb.c files.
387    GEN_UPB_ROOT = '//:src/core/ext/upb-generated/'
388    GEN_UPBDEFS_ROOT = '//:src/core/ext/upbdefs-generated/'
389    EXTERNAL_LINKS = [
390        ('@com_google_protobuf//', ':src/'),
391    ]
392    for name, bazel_rule in bazel_rules.items():
393        gen_func = bazel_rule.get('generator_function', None)
394        if gen_func in ('grpc_upb_proto_library',
395                        'grpc_upb_proto_reflection_library'):
396            # get proto dependency
397            deps = bazel_rule['deps']
398            if len(deps) != 1:
399                raise Exception(
400                    'upb rule "{0}" should have 1 proto dependency but has "{1}"'
401                    .format(name, deps))
402            proto_dep = deps[0]
403            proto_rule = bazel_rules.get(proto_dep, None)
404            if proto_rule is None:
405                raise Exception(
406                    'upb rule "{0}"\'s dependency "{1}" is not found'.format(
407                        name, proto_rule))
408            # deps is not properly fetched from bazel query for upb_proto_library target
409            # so add the upb dependency manually
410            bazel_rule['deps'] = [
411                '//external:upb_lib', '//external:upb_lib_descriptor',
412                '//external:upb_generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me'
413            ]
414            # populate the upb_proto_library rule with pre-generated upb headers
415            # and sources using proto_rule
416            srcs = []
417            hdrs = []
418            for proto_src in proto_rule['srcs']:
419                for external_link in EXTERNAL_LINKS:
420                    if proto_src.startswith(external_link[0]):
421                        proto_src = proto_src[len(external_link[0]) +
422                                              len(external_link[1]):]
423                        break
424                proto_src = _extract_source_file_path(proto_src)
425                ext = '.upb' if gen_func == 'grpc_upb_proto_library' else '.upbdefs'
426                root = GEN_UPB_ROOT if gen_func == 'grpc_upb_proto_library' else GEN_UPBDEFS_ROOT
427                srcs.append(root + proto_src.replace('.proto', ext + '.c'))
428                hdrs.append(root + proto_src.replace('.proto', ext + '.h'))
429            bazel_rule['srcs'] = srcs
430            bazel_rule['hdrs'] = hdrs
431
432
433def _generate_build_metadata(build_extra_metadata: BuildDict,
434                             bazel_rules: BuildDict) -> BuildDict:
435    """Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific "extra metadata"."""
436    lib_names = list(build_extra_metadata.keys())
437    result = {}
438
439    for lib_name in lib_names:
440        lib_dict = _create_target_from_bazel_rule(lib_name, bazel_rules)
441
442        # populate extra properties from the build.yaml-specific "extra metadata"
443        lib_dict.update(build_extra_metadata.get(lib_name, {}))
444
445        # store to results
446        result[lib_name] = lib_dict
447
448    # Rename targets marked with "_RENAME" extra metadata.
449    # This is mostly a cosmetic change to ensure that we end up with build.yaml target
450    # names we're used to from the past (and also to avoid too long target names).
451    # The rename step needs to be made after we're done with most of processing logic
452    # otherwise the already-renamed libraries will have different names than expected
453    for lib_name in lib_names:
454        to_name = build_extra_metadata.get(lib_name, {}).get('_RENAME', None)
455        if to_name:
456            # store lib under the new name and also change its 'name' property
457            if to_name in result:
458                raise Exception('Cannot rename target ' + str(lib_name) + ', ' +
459                                str(to_name) + ' already exists.')
460            lib_dict = result.pop(lib_name)
461            lib_dict['name'] = to_name
462            result[to_name] = lib_dict
463
464            # dep names need to be updated as well
465            for lib_dict_to_update in result.values():
466                lib_dict_to_update['deps'] = list([
467                    to_name if dep == lib_name else dep
468                    for dep in lib_dict_to_update['deps']
469                ])
470
471    return result
472
473
474def _convert_to_build_yaml_like(lib_dict: BuildMetadata) -> BuildYaml:
475    lib_names = [
476        lib_name for lib_name in list(lib_dict.keys())
477        if lib_dict[lib_name].get('_TYPE', 'library') == 'library'
478    ]
479    target_names = [
480        lib_name for lib_name in list(lib_dict.keys())
481        if lib_dict[lib_name].get('_TYPE', 'library') == 'target'
482    ]
483    test_names = [
484        lib_name for lib_name in list(lib_dict.keys())
485        if lib_dict[lib_name].get('_TYPE', 'library') == 'test'
486    ]
487
488    # list libraries and targets in predefined order
489    lib_list = [lib_dict[lib_name] for lib_name in lib_names]
490    target_list = [lib_dict[lib_name] for lib_name in target_names]
491    test_list = [lib_dict[lib_name] for lib_name in test_names]
492
493    # get rid of temporary private fields prefixed with "_" and some other useless fields
494    for lib in lib_list:
495        for field_to_remove in [k for k in lib.keys() if k.startswith('_')]:
496            lib.pop(field_to_remove, None)
497    for target in target_list:
498        for field_to_remove in [k for k in target.keys() if k.startswith('_')]:
499            target.pop(field_to_remove, None)
500        target.pop('public_headers',
501                   None)  # public headers make no sense for targets
502    for test in test_list:
503        for field_to_remove in [k for k in test.keys() if k.startswith('_')]:
504            test.pop(field_to_remove, None)
505        test.pop('public_headers',
506                 None)  # public headers make no sense for tests
507
508    build_yaml_like = {
509        'libs': lib_list,
510        'filegroups': [],
511        'targets': target_list,
512        'tests': test_list,
513    }
514    return build_yaml_like
515
516
517def _extract_cc_tests(bazel_rules: BuildDict) -> List[str]:
518    """Gets list of cc_test tests from bazel rules"""
519    result = []
520    for bazel_rule in bazel_rules.values():
521        if bazel_rule['class'] == 'cc_test':
522            test_name = bazel_rule['name']
523            if test_name.startswith('//'):
524                prefixlen = len('//')
525                result.append(test_name[prefixlen:])
526    return list(sorted(result))
527
528
529def _exclude_unwanted_cc_tests(tests: List[str]) -> List[str]:
530    """Filters out bazel tests that we don't want to run with other build systems or we cannot build them reasonably"""
531
532    # most qps tests are autogenerated, we are fine without them
533    tests = [test for test in tests if not test.startswith('test/cpp/qps:')]
534    # microbenchmarks aren't needed for checking correctness
535    tests = [
536        test for test in tests
537        if not test.startswith('test/cpp/microbenchmarks:')
538    ]
539    tests = [
540        test for test in tests
541        if not test.startswith('test/core/promise/benchmark:')
542    ]
543
544    # we have trouble with census dependency outside of bazel
545    tests = [
546        test for test in tests
547        if not test.startswith('test/cpp/ext/filters/census:') and
548        not test.startswith('test/core/xds:xds_channel_stack_modifier_test')
549    ]
550
551    # missing opencensus/stats/stats.h
552    tests = [
553        test for test in tests if not test.startswith(
554            'test/cpp/end2end:server_load_reporting_end2end_test')
555    ]
556    tests = [
557        test for test in tests if not test.startswith(
558            'test/cpp/server/load_reporter:lb_load_reporter_test')
559    ]
560
561    # The test uses --running_under_bazel cmdline argument
562    # To avoid the trouble needing to adjust it, we just skip the test
563    tests = [
564        test for test in tests if not test.startswith(
565            'test/cpp/naming:resolver_component_tests_runner_invoker')
566    ]
567
568    # the test requires 'client_crash_test_server' to be built
569    tests = [
570        test for test in tests
571        if not test.startswith('test/cpp/end2end:time_change_test')
572    ]
573
574    # the test requires 'client_crash_test_server' to be built
575    tests = [
576        test for test in tests
577        if not test.startswith('test/cpp/end2end:client_crash_test')
578    ]
579
580    # the test requires 'server_crash_test_client' to be built
581    tests = [
582        test for test in tests
583        if not test.startswith('test/cpp/end2end:server_crash_test')
584    ]
585
586    # test never existed under build.yaml and it fails -> skip it
587    tests = [
588        test for test in tests
589        if not test.startswith('test/core/tsi:ssl_session_cache_test')
590    ]
591
592    # the binary of this test does not get built with cmake
593    tests = [
594        test for test in tests
595        if not test.startswith('test/cpp/util:channelz_sampler_test')
596    ]
597
598    # we don't need to generate fuzzers outside of bazel
599    tests = [test for test in tests if not test.endswith('_fuzzer')]
600
601    return tests
602
603
604def _generate_build_extra_metadata_for_tests(
605        tests: List[str], bazel_rules: BuildDict) -> BuildDict:
606    """For given tests, generate the "extra metadata" that we need for our "build.yaml"-like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics."""
607    test_metadata = {}
608    for test in tests:
609        test_dict = {'build': 'test', '_TYPE': 'target'}
610
611        bazel_rule = bazel_rules[_get_bazel_label(test)]
612
613        bazel_tags = bazel_rule['tags']
614        if 'manual' in bazel_tags:
615            # don't run the tests marked as "manual"
616            test_dict['run'] = False
617
618        if bazel_rule['flaky']:
619            # don't run tests that are marked as "flaky" under bazel
620            # because that would only add noise for the run_tests.py tests
621            # and seeing more failures for tests that we already know are flaky
622            # doesn't really help anything
623            test_dict['run'] = False
624
625        if 'no_uses_polling' in bazel_tags:
626            test_dict['uses_polling'] = False
627
628        if 'grpc_fuzzer' == bazel_rule['generator_function']:
629            # currently we hand-list fuzzers instead of generating them automatically
630            # because there's no way to obtain maxlen property from bazel BUILD file.
631            print('skipping fuzzer ' + test)
632            continue
633
634        # if any tags that restrict platform compatibility are present,
635        # generate the "platforms" field accordingly
636        # TODO(jtattermusch): there is also a "no_linux" tag, but we cannot take
637        # it into account as it is applied by grpc_cc_test when poller expansion
638        # is made (for tests where uses_polling=True). So for now, we just
639        # assume all tests are compatible with linux and ignore the "no_linux" tag
640        # completely.
641        known_platform_tags = set(['no_windows', 'no_mac'])
642        if set(bazel_tags).intersection(known_platform_tags):
643            platforms = []
644            # assume all tests are compatible with linux and posix
645            platforms.append('linux')
646            platforms.append(
647                'posix')  # there is no posix-specific tag in bazel BUILD
648            if not 'no_mac' in bazel_tags:
649                platforms.append('mac')
650            if not 'no_windows' in bazel_tags:
651                platforms.append('windows')
652            test_dict['platforms'] = platforms
653
654        cmdline_args = bazel_rule['args']
655        if cmdline_args:
656            test_dict['args'] = list(cmdline_args)
657
658        if test.startswith('test/cpp'):
659            test_dict['language'] = 'c++'
660
661        elif test.startswith('test/core'):
662            test_dict['language'] = 'c'
663        else:
664            raise Exception('wrong test' + test)
665
666        # short test name without the path.
667        # There can be name collisions, but we will resolve them later
668        simple_test_name = os.path.basename(_extract_source_file_path(test))
669        test_dict['_RENAME'] = simple_test_name
670
671        test_metadata[test] = test_dict
672
673    # detect duplicate test names
674    tests_by_simple_name = {}
675    for test_name, test_dict in test_metadata.items():
676        simple_test_name = test_dict['_RENAME']
677        if not simple_test_name in tests_by_simple_name:
678            tests_by_simple_name[simple_test_name] = []
679        tests_by_simple_name[simple_test_name].append(test_name)
680
681    # choose alternative names for tests with a name collision
682    for collision_list in tests_by_simple_name.values():
683        if len(collision_list) > 1:
684            for test_name in collision_list:
685                long_name = test_name.replace('/', '_').replace(':', '_')
686                print(
687                    'short name of "%s" collides with another test, renaming to %s'
688                    % (test_name, long_name))
689                test_metadata[test_name]['_RENAME'] = long_name
690
691    return test_metadata
692
693
694def _detect_and_print_issues(build_yaml_like: BuildYaml) -> None:
695    """Try detecting some unusual situations and warn about them."""
696    for tgt in build_yaml_like['targets']:
697        if tgt['build'] == 'test':
698            for src in tgt['src']:
699                if src.startswith('src/') and not src.endswith('.proto'):
700                    print('source file from under "src/" tree used in test ' +
701                          tgt['name'] + ': ' + src)
702
703
704# extra metadata that will be used to construct build.yaml
705# there are mostly extra properties that we weren't able to obtain from the bazel build
706# _TYPE: whether this is library, target or test
707# _RENAME: whether this target should be renamed to a different name (to match expectations of make and cmake builds)
708_BUILD_EXTRA_METADATA = {
709    'third_party/address_sorting:address_sorting': {
710        'language': 'c',
711        'build': 'all',
712        '_RENAME': 'address_sorting'
713    },
714    'gpr': {
715        'language': 'c',
716        'build': 'all',
717    },
718    'grpc': {
719        'language': 'c',
720        'build': 'all',
721        'baselib': True,
722        'generate_plugin_registry': True
723    },
724    'grpc++': {
725        'language': 'c++',
726        'build': 'all',
727        'baselib': True,
728    },
729    'grpc++_alts': {
730        'language': 'c++',
731        'build': 'all',
732        'baselib': True
733    },
734    'grpc++_error_details': {
735        'language': 'c++',
736        'build': 'all'
737    },
738    'grpc++_reflection': {
739        'language': 'c++',
740        'build': 'all'
741    },
742    'grpc++_unsecure': {
743        'language': 'c++',
744        'build': 'all',
745        'baselib': True,
746    },
747    # TODO(jtattermusch): do we need to set grpc_csharp_ext's LDFLAGS for wrapping memcpy in the same way as in build.yaml?
748    'grpc_csharp_ext': {
749        'language': 'c',
750        'build': 'all',
751    },
752    'grpc_unsecure': {
753        'language': 'c',
754        'build': 'all',
755        'baselib': True,
756        'generate_plugin_registry': True
757    },
758    'grpcpp_channelz': {
759        'language': 'c++',
760        'build': 'all'
761    },
762    'grpc++_test': {
763        'language': 'c++',
764        'build': 'private',
765    },
766    'src/compiler:grpc_plugin_support': {
767        'language': 'c++',
768        'build': 'protoc',
769        '_RENAME': 'grpc_plugin_support'
770    },
771    'src/compiler:grpc_cpp_plugin': {
772        'language': 'c++',
773        'build': 'protoc',
774        '_TYPE': 'target',
775        '_RENAME': 'grpc_cpp_plugin'
776    },
777    'src/compiler:grpc_csharp_plugin': {
778        'language': 'c++',
779        'build': 'protoc',
780        '_TYPE': 'target',
781        '_RENAME': 'grpc_csharp_plugin'
782    },
783    'src/compiler:grpc_node_plugin': {
784        'language': 'c++',
785        'build': 'protoc',
786        '_TYPE': 'target',
787        '_RENAME': 'grpc_node_plugin'
788    },
789    'src/compiler:grpc_objective_c_plugin': {
790        'language': 'c++',
791        'build': 'protoc',
792        '_TYPE': 'target',
793        '_RENAME': 'grpc_objective_c_plugin'
794    },
795    'src/compiler:grpc_php_plugin': {
796        'language': 'c++',
797        'build': 'protoc',
798        '_TYPE': 'target',
799        '_RENAME': 'grpc_php_plugin'
800    },
801    'src/compiler:grpc_python_plugin': {
802        'language': 'c++',
803        'build': 'protoc',
804        '_TYPE': 'target',
805        '_RENAME': 'grpc_python_plugin'
806    },
807    'src/compiler:grpc_ruby_plugin': {
808        'language': 'c++',
809        'build': 'protoc',
810        '_TYPE': 'target',
811        '_RENAME': 'grpc_ruby_plugin'
812    },
813
814    # TODO(jtattermusch): consider adding grpc++_core_stats
815
816    # test support libraries
817    'test/core/util:grpc_test_util': {
818        'language': 'c',
819        'build': 'private',
820        '_RENAME': 'grpc_test_util'
821    },
822    'test/core/util:grpc_test_util_unsecure': {
823        'language': 'c',
824        'build': 'private',
825        '_RENAME': 'grpc_test_util_unsecure'
826    },
827    # TODO(jtattermusch): consider adding grpc++_test_util_unsecure - it doesn't seem to be used by bazel build (don't forget to set secure: False)
828    'test/cpp/util:test_config': {
829        'language': 'c++',
830        'build': 'private',
831        '_RENAME': 'grpc++_test_config'
832    },
833    'test/cpp/util:test_util': {
834        'language': 'c++',
835        'build': 'private',
836        '_RENAME': 'grpc++_test_util'
837    },
838
839    # end2end test support libraries
840    'test/core/end2end:end2end_tests': {
841        'language': 'c',
842        'build': 'private',
843        '_RENAME': 'end2end_tests'
844    },
845    'test/core/end2end:end2end_nosec_tests': {
846        'language': 'c',
847        'build': 'private',
848        '_RENAME': 'end2end_nosec_tests'
849    },
850
851    # benchmark support libraries
852    'test/cpp/microbenchmarks:helpers': {
853        'language': 'c++',
854        'build': 'test',
855        'defaults': 'benchmark',
856        '_RENAME': 'benchmark_helpers'
857    },
858    'test/cpp/interop:interop_client': {
859        'language': 'c++',
860        'build': 'test',
861        'run': False,
862        '_TYPE': 'target',
863        '_RENAME': 'interop_client'
864    },
865    'test/cpp/interop:interop_server': {
866        'language': 'c++',
867        'build': 'test',
868        'run': False,
869        '_TYPE': 'target',
870        '_RENAME': 'interop_server'
871    },
872    'test/cpp/interop:xds_interop_client': {
873        'language': 'c++',
874        'build': 'test',
875        'run': False,
876        '_TYPE': 'target',
877        '_RENAME': 'xds_interop_client'
878    },
879    'test/cpp/interop:xds_interop_server': {
880        'language': 'c++',
881        'build': 'test',
882        'run': False,
883        '_TYPE': 'target',
884        '_RENAME': 'xds_interop_server'
885    },
886    'test/cpp/interop:http2_client': {
887        'language': 'c++',
888        'build': 'test',
889        'run': False,
890        '_TYPE': 'target',
891        '_RENAME': 'http2_client'
892    },
893    'test/cpp/qps:qps_json_driver': {
894        'language': 'c++',
895        'build': 'test',
896        'run': False,
897        '_TYPE': 'target',
898        '_RENAME': 'qps_json_driver'
899    },
900    'test/cpp/qps:qps_worker': {
901        'language': 'c++',
902        'build': 'test',
903        'run': False,
904        '_TYPE': 'target',
905        '_RENAME': 'qps_worker'
906    },
907    'test/cpp/util:grpc_cli': {
908        'language': 'c++',
909        'build': 'test',
910        'run': False,
911        '_TYPE': 'target',
912        '_RENAME': 'grpc_cli'
913    },
914
915    # TODO(jtattermusch): create_jwt and verify_jwt breaks distribtests because it depends on grpc_test_utils and thus requires tests to be built
916    # For now it's ok to disable them as these binaries aren't very useful anyway.
917    #'test/core/security:create_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_create_jwt' },
918    #'test/core/security:verify_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_verify_jwt' },
919
920    # TODO(jtattermusch): add remaining tools such as grpc_print_google_default_creds_token (they are not used by bazel build)
921
922    # TODO(jtattermusch): these fuzzers had no build.yaml equivalent
923    # test/core/compression:message_compress_fuzzer
924    # test/core/compression:message_decompress_fuzzer
925    # test/core/compression:stream_compression_fuzzer
926    # test/core/compression:stream_decompression_fuzzer
927    # test/core/slice:b64_decode_fuzzer
928    # test/core/slice:b64_encode_fuzzer
929}
930
931# We need a complete picture of all the targets and dependencies we're interested in
932# so we run multiple bazel queries and merge the results.
933_BAZEL_DEPS_QUERIES = [
934    'deps("//test/...")',
935    'deps("//:all")',
936    'deps("//src/compiler/...")',
937    'deps("//src/proto/...")',
938    # The ^ is needed to differentiate proto_library from go_proto_library
939    'deps(kind("^proto_library", @envoy_api//envoy/...))',
940]
941
942# Step 1: run a bunch of "bazel query --output xml" queries to collect
943# the raw build metadata from the bazel build.
944# At the end of this step we will have a dictionary of bazel rules
945# that are interesting to us (libraries, binaries, etc.) along
946# with their most important metadata (sources, headers, dependencies)
947#
948# Example of a single bazel rule after being populated:
949# '//:grpc' : { 'class': 'cc_library',
950#               'hdrs': ['//:include/grpc/byte_buffer.h', ... ],
951#               'srcs': ['//:src/core/lib/surface/init.cc', ... ],
952#               'deps': ['//:grpc_common', ...],
953#               ... }
954bazel_rules = {}
955for query in _BAZEL_DEPS_QUERIES:
956    bazel_rules.update(
957        _extract_rules_from_bazel_xml(_bazel_query_xml_tree(query)))
958
959# Step 1.5: The sources for UPB protos are pre-generated, so we want
960# to expand the UPB proto library bazel rules into the generated
961# .upb.h and .upb.c files.
962_expand_upb_proto_library_rules(bazel_rules)
963
964# Step 2: Extract the known bazel cc_test tests. While most tests
965# will be buildable with other build systems just fine, some of these tests
966# would be too difficult to build and run with other build systems,
967# so we simply exclude the ones we don't want.
968# Note that while making tests buildable with other build systems
969# than just bazel is extra effort, we still need to do that for these
970# reasons:
971# - If our cmake build doesn't have any tests at all, it's hard to make
972#   sure that what it built actually works (we need at least some "smoke tests").
973#   This is quite important because the build flags between bazel / non-bazel flag might differ
974#   (sometimes it's for interesting reasons that are not easy to overcome)
975#   which makes it even more important to have at least some tests for cmake/make
976# - Our portability suite actually runs cmake tests and migration of portability
977#   suite fully towards bazel might be intricate (e.g. it's unclear whether it's
978#   possible to get a good enough coverage of different compilers / distros etc.
979#   with bazel)
980# - some things that are considered "tests" in build.yaml-based builds are actually binaries
981#   we'd want to be able to build anyway (qps_json_worker, interop_client, interop_server, grpc_cli)
982#   so it's unclear how much make/cmake simplification we would gain by removing just some (but not all) test
983# TODO(jtattermusch): Investigate feasibility of running portability suite with bazel.
984tests = _exclude_unwanted_cc_tests(_extract_cc_tests(bazel_rules))
985
986# Step 3: Generate the "extra metadata" for all our build targets.
987# While the bazel rules give us most of the information we need,
988# the legacy "build.yaml" format requires some additional fields that
989# we cannot get just from bazel alone (we call that "extra metadata").
990# In this step, we basically analyze the build metadata we have from bazel
991# and use heuristics to determine (and sometimes guess) the right
992# extra metadata to use for each target.
993#
994# - For some targets (such as the public libraries, helper libraries
995#   and executables) determining the right extra metadata is hard to do
996#   automatically. For these targets, the extra metadata is supplied "manually"
997#   in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match
998#   the semantics of the legacy "build.yaml" as closely as possible.
999#
1000# - For test binaries, it is possible to generate the "extra metadata" mostly
1001#   automatically using a rule-based heuristic approach because most tests
1002#   look and behave alike from the build's perspective.
1003#
1004# TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or
1005# the heuristic approach used for tests are ideal and they cannot be made
1006# to cover all possible situations (and are tailored to work with the way
1007# the grpc build currently works), but the idea was to start with something
1008# reasonably simple that matches the "build.yaml"-like semantics as closely
1009# as possible (to avoid changing too many things at once) and gradually get
1010# rid of the legacy "build.yaml"-specific fields one by one. Once that is done,
1011# only very little "extra metadata" would be needed and/or it would be trivial
1012# to generate it automatically.
1013all_extra_metadata = {}
1014all_extra_metadata.update(_BUILD_EXTRA_METADATA)
1015all_extra_metadata.update(
1016    _generate_build_extra_metadata_for_tests(tests, bazel_rules))
1017
1018# Step 4: Compute the build metadata that will be used in the final build.yaml.
1019# The final build metadata includes transitive dependencies, and sources/headers
1020# expanded without intermediate dependencies.
1021# Example:
1022# '//:grpc' : { ...,
1023#               '_TRANSITIVE_DEPS': ['//:gpr_base', ...],
1024#               '_COLLAPSED_DEPS': ['gpr', ...],
1025#               '_COLLAPSED_SRCS': [...],
1026#               '_COLLAPSED_PUBLIC_HEADERS': [...],
1027#               '_COLLAPSED_HEADERS': [...]
1028#             }
1029_populate_transitive_metadata(bazel_rules, all_extra_metadata.keys())
1030
1031# Step 4a: Update the existing test metadata with the updated build metadata.
1032# Certain build metadata of certain test targets depend on the transitive
1033# metadata that wasn't available earlier.
1034update_test_metadata_with_transitive_metadata(all_extra_metadata, bazel_rules)
1035
1036# Step 5: Generate the final metadata for all the targets.
1037# This is done by combining the bazel build metadata and the "extra metadata"
1038# we obtained in the previous step.
1039# In this step, we also perform some interesting massaging of the target metadata
1040# to end up with a result that is as similar to the legacy build.yaml data
1041# as possible.
1042# - Some targets get renamed (to match the legacy build.yaml target names)
1043# - Some intermediate libraries get elided ("expanded") to better match the set
1044#   of targets provided by the legacy build.yaml build
1045#
1046# Originally the target renaming was introduced to address these concerns:
1047# - avoid changing too many things at the same time and avoid people getting
1048#   confused by some well know targets suddenly being missing
1049# - Makefile/cmake and also language-specific generators rely on some build
1050#   targets being called exactly the way they they are. Some of our testing
1051#   scrips also invoke executables (e.g. "qps_json_driver") by their name.
1052# - The autogenerated test name from bazel includes the package path
1053#   (e.g. "test_cpp_TEST_NAME"). Without renaming, the target names would
1054#   end up pretty ugly (e.g. test_cpp_qps_qps_json_driver).
1055# TODO(jtattermusch): reevaluate the need for target renaming in the future.
1056#
1057# Example of a single generated target:
1058# 'grpc' : { 'language': 'c',
1059#            'public_headers': ['include/grpc/byte_buffer.h', ... ],
1060#            'headers': ['src/core/ext/filters/client_channel/client_channel.h', ... ],
1061#            'src': ['src/core/lib/surface/init.cc', ... ],
1062#            'deps': ['gpr', 'address_sorting', ...],
1063#            ... }
1064all_targets_dict = _generate_build_metadata(all_extra_metadata, bazel_rules)
1065
1066# Step 6: convert the dictionary with all the targets to a dict that has
1067# the desired "build.yaml"-like layout.
1068# TODO(jtattermusch): We use the custom "build.yaml"-like layout because
1069# currently all other build systems use that format as their source of truth.
1070# In the future, we can get rid of this custom & legacy format entirely,
1071# but we would need to update the generators for other build systems
1072# at the same time.
1073#
1074# Layout of the result:
1075# { 'libs': { TARGET_DICT_FOR_LIB_XYZ, ... },
1076#   'targets': { TARGET_DICT_FOR_BIN_XYZ, ... },
1077#   'tests': { TARGET_DICT_FOR_TEST_XYZ, ...} }
1078build_yaml_like = _convert_to_build_yaml_like(all_targets_dict)
1079
1080# detect and report some suspicious situations we've seen before
1081_detect_and_print_issues(build_yaml_like)
1082
1083# Step 7: Store the build_autogenerated.yaml in a deterministic (=sorted)
1084# and cleaned-up form.
1085# A basic overview of the resulting "build.yaml"-like format is here:
1086# https://github.com/grpc/grpc/blob/master/templates/README.md
1087# TODO(jtattermusch): The "cleanup" function is taken from the legacy
1088# build system (which used build.yaml) and can be eventually removed.
1089build_yaml_string = build_cleaner.cleaned_build_yaml_dict_as_string(
1090    build_yaml_like)
1091with open('build_autogenerated.yaml', 'w') as file:
1092    file.write(build_yaml_string)
1093