1#!/usr/bin/env python
2
3"""
4Command line utility for determining what jstests have been added or modified
5"""
6
7from __future__ import absolute_import
8
9import collections
10import copy
11import json
12import optparse
13import os.path
14import subprocess
15import re
16import requests
17import shlex
18import sys
19import urlparse
20import yaml
21
22
23# Get relative imports to work when the package is not installed on the PYTHONPATH.
24if __name__ == "__main__" and __package__ is None:
25    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
26from buildscripts import resmokelib
27from buildscripts.ciconfig import evergreen
28
29
30API_SERVER_DEFAULT = "https://evergreen.mongodb.com"
31
32
33def parse_command_line():
34    parser = optparse.OptionParser(usage="Usage: %prog [options] [resmoke command]")
35
36    parser.add_option("--maxRevisions", dest="max_revisions",
37                      help="Maximum number of revisions to check for changes. Default is 25.")
38
39    parser.add_option("--branch", dest="branch",
40                      help="The name of the branch the working branch was based on.")
41
42    parser.add_option("--baseCommit", dest="base_commit",
43                      help="The base commit to compare to for determining changes.")
44
45    parser.add_option("--buildVariant", dest="buildvariant",
46                      help="The buildvariant the tasks will execute on. \
47                            Required when generating the JSON file with test executor information")
48
49    parser.add_option("--checkEvergreen", dest="check_evergreen", action="store_true",
50                      help="Checks Evergreen for the last commit that was scheduled. \
51                            This way all the tests that haven't been burned in will be run.")
52
53    parser.add_option("--noExec", dest="no_exec", action="store_true",
54                      help="Do not run resmoke loop on new tests.")
55
56    parser.add_option("--reportFile", dest="report_file",
57                      help="Write a JSON file with test results.")
58
59    parser.add_option("--testListFile", dest="test_list_file", metavar="TESTLIST",
60                      help="Load a JSON file with tests to run.")
61
62    parser.add_option("--testListOutfile", dest="test_list_outfile",
63                      help="Write a JSON file with test executor information.")
64
65    # The executor_file and suite_files defaults are required to make the
66    # suite resolver work correctly.
67    parser.set_defaults(base_commit=None,
68                        branch="master",
69                        buildvariant=None,
70                        check_evergreen=False,
71                        evergreen_file="etc/evergreen.yml",
72                        selector_file="etc/burn_in_tests.yml",
73                        max_revisions=25,
74                        no_exec=False,
75                        executor_file=None,
76                        report_file="report.json",
77                        suite_files="with_server",
78                        test_list_file=None,
79                        test_list_outfile=None)
80
81    # This disables argument parsing on the first unrecognized parameter. This allows us to pass
82    # a complete resmoke.py command line without accidentally parsing its options.
83    parser.disable_interspersed_args()
84
85    return parser.parse_args()
86
87
88def callo(args):
89    """Call a program, and capture its output
90    """
91    return subprocess.check_output(args)
92
93
94def read_evg_config():
95    # Expand out evergreen config file possibilities
96    file_list = [
97        "./.evergreen.yml",
98        os.path.expanduser("~/.evergreen.yml"),
99        os.path.expanduser("~/cli_bin/.evergreen.yml")]
100
101    for filename in file_list:
102        if os.path.isfile(filename):
103            with open(filename, "r") as fstream:
104                return yaml.load(fstream)
105    return None
106
107
108def find_last_activated_task(revisions, variant, branch_name):
109    """ Get the git hash of the most recently activated build before this one """
110    rest_prefix = "/rest/v1/"
111    project = "mongodb-mongo-" + branch_name
112    build_prefix = "mongodb_mongo_" + branch_name + "_" + variant.replace('-', '_')
113
114    evg_cfg = read_evg_config()
115    if evg_cfg is not None and "api_server_host" in evg_cfg:
116        api_server = "{url.scheme}://{url.netloc}".format(
117            url=urlparse.urlparse(evg_cfg["api_server_host"]))
118    else:
119        api_server = API_SERVER_DEFAULT
120
121    api_prefix = api_server + rest_prefix
122
123    for githash in revisions:
124        response = requests.get(api_prefix + "projects/" + project + "/revisions/" + githash)
125        revision_data = response.json()
126
127        try:
128            for build in revision_data["builds"]:
129                if build.startswith(build_prefix):
130                    build_resp = requests.get(api_prefix + "builds/" + build)
131                    build_data = build_resp.json()
132                    if build_data["activated"]:
133                        return build_data["revision"]
134        except:
135            # Sometimes build data is incomplete, as was the related build.
136            next
137
138    return None
139
140
141def find_changed_tests(branch_name, base_commit, max_revisions, buildvariant, check_evergreen):
142    """
143    Use git to find which files have changed in this patch.
144    TODO: This should be expanded to search for enterprise modules.
145    The returned file paths are in normalized form (see os.path.normpath(path)).
146    """
147    changed_tests = []
148
149    if base_commit is None:
150        base_commit = callo(["git", "merge-base", branch_name + "@{upstream}", "HEAD"]).rstrip()
151    if check_evergreen:
152        # We're going to check up to 200 commits in Evergreen for the last scheduled one.
153        # The current commit will be activated in Evergreen; we use --skip to start at the
154        # previous commit when trying to find the most recent preceding commit that has been
155        # activated.
156        revs_to_check = callo(["git", "rev-list", base_commit,
157                               "--max-count=200", "--skip=1"]).splitlines()
158        last_activated = find_last_activated_task(revs_to_check, buildvariant, branch_name)
159        if last_activated is None:
160            # When the current commit is the first time 'buildvariant' has run, there won't be a
161            # commit among 'revs_to_check' that's been activated in Evergreen. We handle this by
162            # only considering tests changed in the current commit.
163            last_activated = "HEAD"
164        print "Comparing current branch against", last_activated
165        revisions = callo(["git", "rev-list", base_commit + "..." + last_activated]).splitlines()
166        base_commit = last_activated
167    else:
168        revisions = callo(["git", "rev-list", base_commit + "...HEAD"]).splitlines()
169
170    revision_count = len(revisions)
171    if revision_count > max_revisions:
172        print "There are too many revisions included (%d)." % revision_count, \
173              "This is likely because your base branch is not " + branch_name + ".", \
174              "You can allow us to review more than 25 revisions by using", \
175              "the --maxRevisions option."
176        return changed_tests
177
178    changed_files = callo(["git", "diff", "--name-only", base_commit]).splitlines()
179    # New files ("untracked" in git terminology) won't show up in the git diff results.
180    untracked_files = callo(["git", "status", "--porcelain"]).splitlines()
181
182    # The lines with untracked files start with '?? '.
183    for line in untracked_files:
184        if line.startswith("?"):
185            (status, line) = line.split(" ", 1)
186            changed_files.append(line)
187
188    for line in changed_files:
189        line = line.rstrip()
190        # Check that the file exists because it may have been moved or deleted in the patch.
191        if os.path.splitext(line)[1] != ".js" or not os.path.isfile(line):
192            continue
193        if "jstests" in line:
194            path = os.path.normpath(line)
195            changed_tests.append(path)
196    return changed_tests
197
198
199def find_exclude_tests(selector_file):
200    """
201    Parses etc/burn_in_tests.yml. Returns lists of excluded suites, tasks & tests.
202    """
203
204    if not selector_file:
205        return ([], [], [])
206
207    with open(selector_file, "r") as fstream:
208        yml = yaml.load(fstream)
209
210    try:
211        js_test = yml['selector']['js_test']
212    except KeyError:
213        raise Exception("The selector file " + selector_file +
214                        " is missing the 'selector.js_test' key")
215
216    return (resmokelib.utils.default_if_none(js_test.get("exclude_suites"), []),
217            resmokelib.utils.default_if_none(js_test.get("exclude_tasks"), []),
218            resmokelib.utils.default_if_none(js_test.get("exclude_tests"), []))
219
220
221def filter_tests(tests, exclude_tests):
222    """
223    Excludes tests which have been blacklisted.
224    A test is in the tests list, i.e., ['jstests/core/a.js']
225    The tests paths must be in normalized form (see os.path.normpath(path)).
226    """
227
228    if not exclude_tests or not tests:
229        return tests
230
231    # The exclude_tests can be specified using * and ** to specify directory and file patterns.
232    excluded_globbed = set()
233    for exclude_test_pattern in exclude_tests:
234        excluded_globbed.update(resmokelib.utils.globstar.iglob(exclude_test_pattern))
235
236    return set(tests) - excluded_globbed
237
238
239def find_tests_by_executor(suites):
240    """
241    Looks up what other resmoke suites run the tests specified in the suites
242    parameter. Returns a dict keyed by test name, value is array of suite names.
243    """
244
245    memberships = {}
246    test_membership = resmokelib.parser.create_test_membership_map()
247    for suite in suites:
248        for test in suite.tests:
249            memberships[test] = test_membership[test]
250    return memberships
251
252
253def create_executor_list(suites, exclude_suites):
254    """
255    Looks up what other resmoke suites run the tests specified in the suites
256    parameter. Returns a dict keyed by suite name / executor, value is tests
257    to run under that executor.
258    """
259
260    memberships = collections.defaultdict(list)
261    test_membership = resmokelib.parser.create_test_membership_map()
262    for suite in suites:
263        for test in suite.tests:
264            for executor in set(test_membership[test]) - set(exclude_suites):
265                memberships[executor].append(test)
266    return memberships
267
268
269def create_task_list(evergreen_conf, buildvariant, suites, exclude_tasks):
270    """
271    Finds associated tasks for the specified buildvariant and suites.
272
273    Returns a dict keyed by task_name, with executor, resmoke_args & tests, i.e.,
274    {'jsCore_small_oplog':
275        {'resmoke_args': '--suites=core_small_oplog --storageEngine=mmapv1',
276         'tests': ['jstests/core/all2.js', 'jstests/core/all3.js']}
277    }
278    """
279
280    evg_buildvariant = evergreen_conf.get_variant(buildvariant)
281    if not evg_buildvariant:
282        print "Buildvariant", buildvariant, "not found in", evergreen_conf.path
283        sys.exit(1)
284
285    # Find all the buildvariant task's resmoke_args.
286    variant_task_args = {}
287    exclude_tasks_set = set(exclude_tasks)
288    for task in evg_buildvariant.tasks:
289        if task.name not in exclude_tasks_set:
290            # Using 'task.combined_resmoke_args' to include the variant's test_flags and
291            # allow the storage engine to be overridden.
292            resmoke_args = task.combined_resmoke_args
293            if resmoke_args:
294                variant_task_args[task.name] = resmoke_args
295
296    # Create the list of tasks to run for the specified suite.
297    tasks_to_run = {}
298    for suite in suites.keys():
299        for task_name, task_arg in variant_task_args.items():
300            # Find the resmoke_args for matching suite names.
301            if re.compile('--suites=' + suite + '(?:\s+|$)').match(task_arg):
302                tasks_to_run[task_name] = {
303                    "resmoke_args": task_arg,
304                    "tests": suites[suite]
305                }
306
307    return tasks_to_run
308
309
310def _write_report_file(tests_by_executor, pathname):
311    """
312    Writes out a JSON file containing the tests_by_executor dict.  This should
313    be done during the compile task when the git repo is available.
314    """
315    with open(pathname, "w") as fstream:
316        json.dump(tests_by_executor, fstream)
317
318
319def _load_tests_file(pathname):
320    """
321    Load the list of tests and executors from the specified file. The file might
322    not exist, and this is fine. The task running this becomes a nop.
323    """
324    if not os.path.isfile(pathname):
325        return None
326    with open(pathname, "r") as fstream:
327        return json.load(fstream)
328
329
330def _save_report_data(saved_data, pathname, task):
331    """
332    Read in the report file from the previous resmoke.py run if it exists. We'll concat it to the
333    passed saved_data dict.
334    """
335    if not os.path.isfile(pathname):
336        return None
337
338    with open(pathname, "r") as fstream:
339        current_data = json.load(fstream)
340    for result in current_data["results"]:
341        result["test_file"] += ":" + task
342
343    saved_data["failures"] += current_data["failures"]
344    saved_data["results"] += current_data["results"]
345
346
347def main():
348    values, args = parse_command_line()
349
350    # If a resmoke.py command wasn't passed in, use a simple version.
351    if not args:
352        args = ["python", "buildscripts/resmoke.py", "--repeat=2"]
353
354    # Load the dict of tests to run.
355    if values.test_list_file:
356        tests_by_task = _load_tests_file(values.test_list_file)
357        # If there are no tests to run, carry on.
358        if tests_by_task is None:
359            test_results = {"failures": 0, "results": []}
360            _write_report_file(test_results, values.report_file)
361            sys.exit(0)
362
363    # Run the executor finder.
364    else:
365        # Parse the Evergreen project configuration file.
366        evergreen_conf = evergreen.EvergreenProjectConfig(values.evergreen_file)
367
368        if values.buildvariant is None:
369            print "Option buildVariant must be specified to find changed tests.\n", \
370                  "Select from the following: \n" \
371                  "\t", "\n\t".join(sorted(evergreen_conf.variant_names))
372            sys.exit(1)
373
374        changed_tests = find_changed_tests(values.branch,
375                                           values.base_commit,
376                                           values.max_revisions,
377                                           values.buildvariant,
378                                           values.check_evergreen)
379        exclude_suites, exclude_tasks, exclude_tests = find_exclude_tests(values.selector_file)
380        changed_tests = filter_tests(changed_tests, exclude_tests)
381        # If there are no changed tests, exit cleanly.
382        if not changed_tests:
383            print "No new or modified tests found."
384            _write_report_file({}, values.test_list_outfile)
385            sys.exit(0)
386        suites = resmokelib.parser.get_suites(values, changed_tests)
387        tests_by_executor = create_executor_list(suites, exclude_suites)
388        tests_by_task = create_task_list(evergreen_conf,
389                                         values.buildvariant,
390                                         tests_by_executor,
391                                         exclude_tasks)
392        if values.test_list_outfile is not None:
393            _write_report_file(tests_by_task, values.test_list_outfile)
394
395    # If we're not in noExec mode, run the tests.
396    if not values.no_exec:
397        test_results = {"failures": 0, "results": []}
398
399        for task in sorted(tests_by_task):
400            resmoke_cmd = copy.deepcopy(args)
401            resmoke_cmd.extend(shlex.split(tests_by_task[task]["resmoke_args"]))
402            resmoke_cmd.extend(tests_by_task[task]["tests"])
403            try:
404                subprocess.check_call(resmoke_cmd, shell=False)
405            except subprocess.CalledProcessError as err:
406                print "Resmoke returned an error with task:", task
407                _save_report_data(test_results, values.report_file, task)
408                _write_report_file(test_results, values.report_file)
409                sys.exit(err.returncode)
410
411            _save_report_data(test_results, values.report_file, task)
412        _write_report_file(test_results, values.report_file)
413
414    sys.exit(0)
415
416
417if __name__ == "__main__":
418    main()
419