1#!/usr/bin/env vpython
2#
3# [VPYTHON:BEGIN]
4# wheel: <
5#   name: "infra/python/wheels/google-auth-py2_py3"
6#   version: "version:1.2.1"
7# >
8#
9# wheel: <
10#   name: "infra/python/wheels/pyasn1-py2_py3"
11#   version: "version:0.4.5"
12# >
13#
14# wheel: <
15#   name: "infra/python/wheels/pyasn1_modules-py2_py3"
16#   version: "version:0.2.4"
17# >
18#
19# wheel: <
20#   name: "infra/python/wheels/six"
21#   version: "version:1.10.0"
22# >
23#
24# wheel: <
25#   name: "infra/python/wheels/cachetools-py2_py3"
26#   version: "version:2.0.1"
27# >
28# wheel: <
29#   name: "infra/python/wheels/rsa-py2_py3"
30#   version: "version:4.0"
31# >
32#
33# wheel: <
34#   name: "infra/python/wheels/requests"
35#   version: "version:2.13.0"
36# >
37#
38# wheel: <
39#   name: "infra/python/wheels/google-api-python-client-py2_py3"
40#   version: "version:1.6.2"
41# >
42#
43# wheel: <
44#   name: "infra/python/wheels/httplib2-py2_py3"
45#   version: "version:0.12.1"
46# >
47#
48# wheel: <
49#   name: "infra/python/wheels/oauth2client-py2_py3"
50#   version: "version:3.0.0"
51# >
52#
53# wheel: <
54#   name: "infra/python/wheels/uritemplate-py2_py3"
55#   version: "version:3.0.0"
56# >
57#
58# wheel: <
59#   name: "infra/python/wheels/google-auth-oauthlib-py2_py3"
60#   version: "version:0.3.0"
61# >
62#
63# wheel: <
64#   name: "infra/python/wheels/requests-oauthlib-py2_py3"
65#   version: "version:1.2.0"
66# >
67#
68# wheel: <
69#   name: "infra/python/wheels/oauthlib-py2_py3"
70#   version: "version:3.0.1"
71# >
72#
73# wheel: <
74#   name: "infra/python/wheels/google-auth-httplib2-py2_py3"
75#   version: "version:0.0.3"
76# >
77# [VPYTHON:END]
78#
79# Copyright 2019 The ANGLE Project Authors. All rights reserved.
80# Use of this source code is governed by a BSD-style license that can be
81# found in the LICENSE file.
82#
83# generate_deqp_stats.py:
84#   Checks output of deqp testers and generates stats using the GDocs API
85#
86# prerequirements:
87#   https://devsite.googleplex.com/sheets/api/quickstart/python
88#   Follow the quickstart guide.
89#
90# usage: generate_deqp_stats.py [-h] [--auth_path [AUTH_PATH]] [--spreadsheet [SPREADSHEET]]
91#                               [--verbosity [VERBOSITY]]
92#
93# optional arguments:
94#   -h, --help            show this help message and exit
95#   --auth_path [AUTH_PATH]
96#                         path to directory containing authorization data (credentials.json and
97#                         token.pickle). [default=<home>/.auth]
98#   --spreadsheet [SPREADSHEET]
99#                         ID of the spreadsheet to write stats to. [default
100#                         ='1D6Yh7dAPP-aYLbX3HHQD8WubJV9XPuxvkKowmn2qhIw']
101#   --verbosity [VERBOSITY]
102#                         Verbosity of output. Valid options are [DEBUG, INFO, WARNING, ERROR].
103#                         [default=INFO]
104
105import argparse
106import datetime
107import logging
108import os
109import pickle
110import re
111import subprocess
112import sys
113import urllib
114from google.auth.transport.requests import Request
115from googleapiclient.discovery import build
116from google_auth_oauthlib.flow import InstalledAppFlow
117
118####################
119# Global Constants #
120####################
121
122HOME_DIR = os.path.expanduser('~')
123SCRIPT_DIR = sys.path[0]
124ROOT_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, '..'))
125
126LOGGER = logging.getLogger('generate_stats')
127
128SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
129
130BOT_NAMES = [
131    'Win10 FYI x64 dEQP Release (NVIDIA)',
132    'Win10 FYI x64 dEQP Release (Intel HD 630)',
133    'Win7 FYI dEQP Release (AMD)',
134    'Win7 FYI x64 dEQP Release (NVIDIA)',
135    'Mac FYI dEQP Release Intel',
136    'Mac FYI dEQP Release AMD',
137    'Linux FYI dEQP Release (Intel HD 630)',
138    'Linux FYI dEQP Release (NVIDIA)',
139    'Android FYI dEQP Release (Nexus 5X)',
140    'Android FYI 32 dEQP Vk Release (Pixel 2)',
141    'Android FYI 64 dEQP Vk Release (Pixel 2)',
142]
143BOT_NAME_PREFIX = 'chromium/ci/'
144BUILD_LINK_PREFIX = 'https://ci.chromium.org/p/chromium/builders/ci/'
145
146REQUIRED_COLUMNS = ['build_link', 'time', 'date', 'revision', 'angle_revision', 'duplicate']
147MAIN_RESULT_COLUMNS = ['Passed', 'Failed', 'Skipped', 'Not Supported', 'Exception', 'Crashed']
148
149INFO_TAG = '*RESULT'
150
151WORKAROUND_FORMATTING_ERROR_STRING = "Still waiting for the following processes to finish:"
152
153######################
154# Build Info Parsing #
155######################
156
157
158# Returns a struct with info about the latest successful build given a bot name. Info contains the
159# build_name, time, date, angle_revision, and chrome revision.
160# Uses: bb ls '<botname>' -n 1 -status success -p
161def get_latest_success_build_info(bot_name):
162    bb = subprocess.Popen(['bb', 'ls', bot_name, '-n', '1', '-status', 'success', '-p'],
163                          stdout=subprocess.PIPE,
164                          stderr=subprocess.PIPE)
165    LOGGER.debug("Ran [bb ls '" + bot_name + "' -n 1 -status success -p]")
166    out, err = bb.communicate()
167    if err:
168        raise ValueError("Unexpected error from bb ls: '" + err + "'")
169    if not out:
170        raise ValueError("Unexpected empty result from bb ls of bot '" + bot_name + "'")
171    # Example output (line 1):
172    # ci.chromium.org/b/8915280275579996928 SUCCESS   'chromium/ci/Win10 FYI dEQP Release (NVIDIA)/26877'
173    # ...
174    if 'SUCCESS' not in out:
175        raise ValueError("Unexpected result from bb ls: '" + out + "'")
176    info = {}
177    for line in out.splitlines():
178        # The first line holds the build name
179        if 'build_name' not in info:
180            info['build_name'] = line.strip().split("'")[1]
181            # Remove the bot name and prepend the build link
182            info['build_link'] = BUILD_LINK_PREFIX + urllib.quote(
183                info['build_name'].split(BOT_NAME_PREFIX)[1])
184        if 'Created' in line:
185            # Example output of line with 'Created':
186            # ...
187            # Created today at 12:26:39, waited 2.056319s, started at 12:26:41, ran for 1h16m48.14963s, ended at 13:43:30
188            # ...
189            info['time'] = re.findall(r'[0-9]{1,2}:[0-9]{2}:[0-9]{2}', line.split(',', 1)[0])[0]
190            # Format today's date in US format so Sheets can read it properly
191            info['date'] = datetime.datetime.now().strftime('%m/%d/%y')
192        if 'got_angle_revision' in line:
193            # Example output of line with angle revision:
194            # ...
195            #   "parent_got_angle_revision": "8cbd321cafa92ffbf0495e6d0aeb9e1a97940fee",
196            # ...
197            info['angle_revision'] = filter(str.isalnum, line.split(':')[1])
198        if '"revision"' in line:
199            # Example output of line with chromium revision:
200            # ...
201            #   "revision": "3b68405a27f1f9590f83ae07757589dba862f141",
202            # ...
203            info['revision'] = filter(str.isalnum, line.split(':')[1])
204    if 'build_name' not in info:
205        raise ValueError("Could not find build_name from bot '" + bot_name + "'")
206    return info
207
208
209# Returns a list of step names that we're interested in given a build name. We are interested in
210# step names starting with 'angle_'. May raise an exception.
211# Uses: bb get '<build_name>' -steps
212def get_step_names(build_name):
213    bb = subprocess.Popen(['bb', 'get', build_name, '-steps'],
214                          stdout=subprocess.PIPE,
215                          stderr=subprocess.PIPE)
216    LOGGER.debug("Ran [bb get '" + build_name + "' -steps]")
217    out, err = bb.communicate()
218    if err:
219        raise ValueError("Unexpected error from bb get: '" + err + "'")
220    step_names = []
221    # Example output (relevant lines to a single step):
222    # ...
223    # Step "angle_deqp_egl_vulkan_tests on (nvidia-quadro-p400-win10-stable) GPU on Windows on Windows-10"                                      SUCCESS   4m12s     Logs: "stdout", "chromium_swarming.summary", "Merge script log", "Flaky failure: dEQP.EGL&#x2f;info_version (status CRASH,SUCCESS)", "step_metadata"
224    # Run on OS: 'Windows-10'<br>Max shard duration: 0:04:07.309848 (shard \#1)<br>Min shard duration: 0:02:26.402128 (shard \#0)<br/>flaky failures [ignored]:<br/>dEQP.EGL/info\_version<br/>
225    #  * [shard #0 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=9a5999a59d332e55f54f495948d0c9f959e60ed2)
226    #  * [shard #0 (128.3 sec)](https://chromium-swarm.appspot.com/user/task/446903ae365b8110)
227    #  * [shard #1 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=d71e1bdd91dee61b536b4057a9222e642bd3809f)
228    #  * [shard #1 (229.3 sec)](https://chromium-swarm.appspot.com/user/task/446903b7b0d90210)
229    #  * [shard #2 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=ac9ba85b1cca77774061b87335c077980e1eef85)
230    #  * [shard #2 (144.5 sec)](https://chromium-swarm.appspot.com/user/task/446903c18e15a010)
231    #  * [shard #3 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=976d586386864abecf53915fbac3e085f672e30f)
232    #  * [shard #3 (138.4 sec)](https://chromium-swarm.appspot.com/user/task/446903cc8da0ad10)
233    # ...
234    for line in out.splitlines():
235        if 'Step "angle_' not in line:
236            continue
237        step_names.append(line.split('"')[1])
238    return step_names
239
240
241# Performs some heuristic validation of the step_info struct returned from a single step log.
242# Returns True if valid, False if invalid. May write to stderr
243def validate_step_info(step_info, build_name, step_name):
244    print_name = "'" + build_name + "': '" + step_name + "'"
245    if not step_info:
246        LOGGER.warning('Step info empty for ' + print_name + '\n')
247        return False
248
249    if 'Total' in step_info:
250        partial_sum_keys = MAIN_RESULT_COLUMNS
251        partial_sum_values = [int(step_info[key]) for key in partial_sum_keys if key in step_info]
252        computed_total = sum(partial_sum_values)
253        if step_info['Total'] != computed_total:
254            LOGGER.warning('Step info does not sum to total for ' + print_name + ' | Total: ' +
255                           str(step_info['Total']) + ' - Computed total: ' + str(computed_total) +
256                           '\n')
257    return True
258
259
260# Returns a struct containing parsed info from a given step log. The info is parsed by looking for
261# lines with the following format in stdout:
262# '[TESTSTATS]: <key>: <value>''
263# May write to stderr
264# Uses: bb log '<build_name>' '<step_name>'
265def get_step_info(build_name, step_name):
266    bb = subprocess.Popen(['bb', 'log', build_name, step_name],
267                          stdout=subprocess.PIPE,
268                          stderr=subprocess.PIPE)
269    LOGGER.debug("Ran [bb log '" + build_name + "' '" + step_name + "']")
270    out, err = bb.communicate()
271    if err:
272        LOGGER.warning("Unexpected error from bb log '" + build_name + "' '" + step_name + "': '" +
273                       err + "'")
274        return None
275    step_info = {}
276    # Example output (relevant lines of stdout):
277    # ...
278    # *RESULT: Total: 155
279    # *RESULT: Passed: 11
280    # *RESULT: Failed: 0
281    # *RESULT: Skipped: 12
282    # *RESULT: Not Supported: 132
283    # *RESULT: Exception: 0
284    # *RESULT: Crashed: 0
285    # *RESULT: Unexpected Passed: 12
286    # ...
287    append_errors = []
288    # Hacky workaround to fix issue where messages are dropped into the middle of lines by another
289    # process:
290    # eg.
291    # *RESULT: <start_of_result>Still waiting for the following processes to finish:
292    # "c:\b\s\w\ir\out\Release\angle_deqp_gles3_tests.exe" --deqp-egl-display-type=angle-vulkan --gtest_flagfile="c:\b\s\w\itlcgdrz\scoped_dir7104_364984996\8ad93729-f679-406d-973b-06b9d1bf32de.tmp" --single-process-tests --test-launcher-batch-limit=400 --test-launcher-output="c:\b\s\w\itlcgdrz\7104_437216092\test_results.xml" --test-launcher-summary-output="c:\b\s\w\iosuk8ai\output.json"
293    # <end_of_result>
294    #
295    # Removes the message and skips the line following it, and then appends the <start_of_result>
296    # and <end_of_result> back together
297    workaround_prev_line = ""
298    workaround_prev_line_count = 0
299    for line in out.splitlines():
300        # Skip lines if the workaround still has lines to skip
301        if workaround_prev_line_count > 0:
302            workaround_prev_line_count -= 1
303            continue
304        # If there are no more lines to skip and there is a previous <start_of_result> to append,
305        # append it and finish the workaround
306        elif workaround_prev_line != "":
307            line = workaround_prev_line + line
308            workaround_prev_line = ""
309            workaround_prev_line_count = 0
310            LOGGER.debug("Formatting error workaround rebuilt line as: '" + line + "'\n")
311
312        if INFO_TAG not in line:
313            continue
314
315        # When the workaround string is detected, start the workaround with 1 line to skip and save
316        # the <start_of_result>, but continue the loop until the workaround is finished
317        if WORKAROUND_FORMATTING_ERROR_STRING in line:
318            workaround_prev_line = line.split(WORKAROUND_FORMATTING_ERROR_STRING)[0]
319            workaround_prev_line_count = 1
320            continue
321
322        found_stat = True
323        line_columns = line.split(INFO_TAG, 1)[1].split(':')
324        if len(line_columns) is not 3:
325            LOGGER.warning("Line improperly formatted: '" + line + "'\n")
326            continue
327        key = line_columns[1].strip()
328        # If the value is clearly an int, sum it. Otherwise, concatenate it as a string
329        isInt = False
330        intVal = 0
331        try:
332            intVal = int(line_columns[2])
333            if intVal is not None:
334                isInt = True
335        except Exception as error:
336            isInt = False
337
338        if isInt:
339            if key not in step_info:
340                step_info[key] = 0
341            step_info[key] += intVal
342        else:
343            if key not in step_info:
344                step_info[key] = line_columns[2].strip()
345            else:
346                append_string = '\n' + line_columns[2].strip()
347                # Sheets has a limit of 50000 characters per cell, so make sure to stop appending
348                # below this limit
349                if len(step_info[key]) + len(append_string) < 50000:
350                    step_info[key] += append_string
351                else:
352                    if key not in append_errors:
353                        append_errors.append(key)
354                        LOGGER.warning("Too many characters in column '" + key +
355                                       "'. Output capped.")
356    return step_info
357
358
359# Returns the info for each step run on a given bot_name.
360def get_bot_info(bot_name):
361    info = get_latest_success_build_info(bot_name)
362    info['step_names'] = get_step_names(info['build_name'])
363    for step_name in info['step_names']:
364        LOGGER.info("Parsing step '" + step_name + "'...")
365        step_info = get_step_info(info['build_name'], step_name)
366        if validate_step_info(step_info, info['build_name'], step_name):
367            info[step_name] = step_info
368        else:
369            info['step_names'].remove(step_name)
370    return info
371
372
373#####################
374# Sheets Formatting #
375#####################
376
377
378# Get an individual spreadsheet based on the spreadsheet id. Returns the result of
379# spreadsheets.get(), or throws an exception if the sheet could not open.
380def get_spreadsheet(service, spreadsheet_id):
381    LOGGER.debug("Called [spreadsheets.get(spreadsheetId='" + spreadsheet_id + "')]")
382    request = service.get(spreadsheetId=spreadsheet_id)
383    spreadsheet = request.execute()
384    if not spreadsheet:
385        raise Exception("Did not open spreadsheet '" + spreadsheet_id + "'")
386    return spreadsheet
387
388
389# Returns a nicely formatted string based on the bot_name and step_name
390def format_sheet_name(bot_name, step_name):
391    # Some tokens should be ignored for readability in the name
392    unneccesary_tokens = ['FYI', 'Release', 'Vk', 'dEQP', '(', ')']
393    for token in unneccesary_tokens:
394        bot_name = bot_name.replace(token, '')
395    bot_name = ' '.join(bot_name.strip().split())  # Remove extra spaces
396    step_name = re.findall(r'angle\w*', step_name)[0]  # Separate test name
397    # Test names are formatted as 'angle_deqp_<frontend>_<backend>_tests'
398    new_step_name = ''
399    # Put the frontend first
400    if '_egl_' in step_name:
401        step_name = step_name.replace('_egl_', '_')
402        new_step_name += ' EGL'
403    if '_gles2_' in step_name:
404        step_name = step_name.replace('_gles2_', '_')
405        new_step_name += ' GLES 2.0 '
406    if '_gles3_' in step_name:
407        step_name = step_name.replace('_gles3_', '_')
408        new_step_name += ' GLES 3.0 '
409    if '_gles31_' in step_name:
410        step_name = step_name.replace('_gles31_', '_')
411        new_step_name += ' GLES 3.1 '
412    # Put the backend second
413    if '_d3d9_' in step_name:
414        step_name = step_name.replace('_d3d9_', '_')
415        new_step_name += ' D3D9 '
416    if '_d3d11' in step_name:
417        step_name = step_name.replace('_d3d11_', '_')
418        new_step_name += ' D3D11 '
419    if '_gl_' in step_name:
420        step_name = step_name.replace('_gl_', '_')
421        new_step_name += ' Desktop OpenGL '
422    if '_gles_' in step_name:
423        step_name = step_name.replace('_gles_', '_')
424        new_step_name += ' OpenGLES '
425    if '_vulkan_' in step_name:
426        step_name = step_name.replace('_vulkan_', '_')
427        new_step_name += ' Vulkan '
428    # Add any remaining keywords from the step name into the formatted name (formatted nicely)
429    step_name = step_name.replace('angle_', '_')
430    step_name = step_name.replace('_deqp_', '_')
431    step_name = step_name.replace('_tests', '_')
432    step_name = step_name.replace('_', ' ').strip()
433    new_step_name += ' ' + step_name
434    new_step_name = ' '.join(new_step_name.strip().split())  # Remove extra spaces
435    return new_step_name + ' ' + bot_name
436
437
438# Returns the full list of sheet names that should be populated based on the info struct
439def get_sheet_names(info):
440    sheet_names = []
441    for bot_name in info:
442        for step_name in info[bot_name]['step_names']:
443            sheet_name = format_sheet_name(bot_name, step_name)
444            sheet_names.append(sheet_name)
445    return sheet_names
446
447
448# Returns True if the sheet is found in the spreadsheets object
449def sheet_exists(spreadsheet, step_name):
450    for sheet in spreadsheet['sheets']:
451        if sheet['properties']['title'] == step_name:
452            return True
453    return False
454
455
456# Validates the spreadsheets object against the list of sheet names which should appear. Returns a
457# list of sheets that need creation.
458def validate_sheets(spreadsheet, sheet_names):
459    create_sheets = []
460    for sheet_name in sheet_names:
461        if not sheet_exists(spreadsheet, sheet_name):
462            create_sheets.append(sheet_name)
463    return create_sheets
464
465
466# Performs a batch update with a given service, spreadsheet id, and list <object(Request)> of
467# updates to do.
468def batch_update(service, spreadsheet_id, updates):
469    batch_update_request_body = {
470        'requests': updates,
471    }
472    LOGGER.debug("Called [spreadsheets.batchUpdate(spreadsheetId='" + spreadsheet_id + "', body=" +
473                 str(batch_update_request_body) + ')]')
474    request = service.batchUpdate(spreadsheetId=spreadsheet_id, body=batch_update_request_body)
475    request.execute()
476
477
478# Creates sheets given a service and spreadsheed id based on a list of sheet names input
479def create_sheets(service, spreadsheet_id, sheet_names):
480    updates = [{'addSheet': {'properties': {'title': sheet_name,}}} for sheet_name in sheet_names]
481    batch_update(service, spreadsheet_id, updates)
482
483
484# Calls a values().batchGet() on the service to find the list of column names from each sheet in
485# sheet_names. Returns a dictionary with one list per sheet_name.
486def get_headers(service, spreadsheet_id, sheet_names):
487    header_ranges = [sheet_name + '!A1:Z' for sheet_name in sheet_names]
488    LOGGER.debug("Called [spreadsheets.values().batchGet(spreadsheetId='" + spreadsheet_id +
489                 ', ranges=' + str(header_ranges) + "')]")
490    request = service.values().batchGet(spreadsheetId=spreadsheet_id, ranges=header_ranges)
491    response = request.execute()
492    headers = {}
493    for k, sheet_name in enumerate(sheet_names):
494        if 'values' in response['valueRanges'][k]:
495            # Headers are in the first row of values
496            headers[sheet_name] = response['valueRanges'][k]['values'][0]
497        else:
498            headers[sheet_name] = []
499    return headers
500
501
502# Calls values().batchUpdate() with supplied list of data <object(ValueRange)> to update on the
503# service.
504def batch_update_values(service, spreadsheet_id, data):
505    batch_update_values_request_body = {
506        'valueInputOption': 'USER_ENTERED',  # Helps with formatting of dates
507        'data': data,
508    }
509    LOGGER.debug("Called [spreadsheets.values().batchUpdate(spreadsheetId='" + spreadsheet_id +
510                 "', body=" + str(batch_update_values_request_body) + ')]')
511    request = service.values().batchUpdate(
512        spreadsheetId=spreadsheet_id, body=batch_update_values_request_body)
513    request.execute()
514
515
516# Get the sheetId of a sheet based on its name
517def get_sheet_id(spreadsheet, sheet_name):
518    for sheet in spreadsheet['sheets']:
519        if sheet['properties']['title'] == sheet_name:
520            return sheet['properties']['sheetId']
521    return -1
522
523
524# Update the filters on sheets with a 'duplicate' column. Filter out any duplicate rows
525def update_filters(service, spreadsheet_id, headers, info, spreadsheet):
526    updates = []
527    for bot_name in info:
528        for step_name in info[bot_name]['step_names']:
529            sheet_name = format_sheet_name(bot_name, step_name)
530            duplicate_found = 'duplicate' in headers[sheet_name]
531            if duplicate_found:
532                sheet_id = get_sheet_id(spreadsheet, sheet_name)
533                if sheet_id > -1:
534                    updates.append({
535                        "setBasicFilter": {
536                            "filter": {
537                                "range": {
538                                    "sheetId": sheet_id,
539                                    "startColumnIndex": 0,
540                                    "endColumnIndex": len(headers[sheet_name])
541                                },
542                                "sortSpecs": [{
543                                    "dimensionIndex": headers[sheet_name].index('date'),
544                                    "sortOrder": "ASCENDING"
545                                }],
546                                "criteria": {
547                                    str(headers[sheet_name].index('duplicate')): {
548                                        "hiddenValues":
549                                            ["1"]  # Hide rows when duplicate is 1 (true)
550                                    }
551                                }
552                            }
553                        }
554                    })
555    if updates:
556        LOGGER.info('Updating sheet filters...')
557        batch_update(service, spreadsheet_id, updates)
558
559# Populates the headers with any missing/desired rows based on the info struct, and calls
560# batch update to update the corresponding sheets if necessary.
561def update_headers(service, spreadsheet_id, headers, info):
562    data = []
563    sheet_names = []
564    for bot_name in info:
565        for step_name in info[bot_name]['step_names']:
566            sheet_name = format_sheet_name(bot_name, step_name)
567            headers_stale = False
568            # Headers should always contain the following columns
569            for req in REQUIRED_COLUMNS:
570                if req not in headers[sheet_name]:
571                    headers_stale = True
572                    headers[sheet_name].append(req)
573            # Headers also must contain all the keys seen in this step
574            for key in info[bot_name][step_name].keys():
575                if key not in headers[sheet_name]:
576                    headers_stale = True
577                    headers[sheet_name].append(key)
578            # Update the Gdoc headers if necessary
579            if headers_stale:
580                sheet_names.append(sheet_name)
581                header_range = sheet_name + '!A1:Z'
582                data.append({
583                    'range': header_range,
584                    'majorDimension': 'ROWS',
585                    'values': [headers[sheet_name]]
586                })
587    if data:
588        LOGGER.info('Updating sheet headers...')
589        batch_update_values(service, spreadsheet_id, data)
590
591
592# Calls values().append() to append a list of values to a given sheet.
593def append_values(service, spreadsheet_id, sheet_name, values):
594    header_range = sheet_name + '!A1:Z'
595    insert_data_option = 'INSERT_ROWS'
596    value_input_option = 'USER_ENTERED'  # Helps with formatting of dates
597    append_values_request_body = {
598        'range': header_range,
599        'majorDimension': 'ROWS',
600        'values': [values],
601    }
602    LOGGER.debug("Called [spreadsheets.values().append(spreadsheetId='" + spreadsheet_id +
603                 "', body=" + str(append_values_request_body) + ", range='" + header_range +
604                 "', insertDataOption='" + insert_data_option + "', valueInputOption='" +
605                 value_input_option + "')]")
606    request = service.values().append(
607        spreadsheetId=spreadsheet_id,
608        body=append_values_request_body,
609        range=header_range,
610        insertDataOption=insert_data_option,
611        valueInputOption=value_input_option)
612    request.execute()
613
614
615# Formula to determine whether a row is a duplicate of the previous row based on checking the
616# columns listed in filter_columns.
617# Eg.
618# date | pass | fail
619# Jan 1  100    50
620# Jan 2  100    50
621# Jan 3  99     51
622#
623# If we want to filter based on only the "pass" and "fail" columns, we generate the following
624# formula in the 'duplicate' column: 'IF(B1=B0, IF(C1=C0,1,0) ,0);
625# This formula is recursively generated for each column in filter_columns, using the column
626# position as determined by headers. The formula uses a more generalized form with
627# 'INDIRECT(ADDRESS(<row>, <col>))'' instead of 'B1', where <row> is Row() and Row()-1, and col is
628# determined by the column's position in headers
629def generate_duplicate_formula(headers, filter_columns):
630    # No more columns, put a 1 in the IF statement true branch
631    if len(filter_columns) == 0:
632        return '1'
633    # Next column is found, generate the formula for duplicate checking, and remove from the list
634    # for recursion
635    for i in range(len(headers)):
636        if headers[i] == filter_columns[0]:
637            col = str(i + 1)
638            formula = "IF(INDIRECT(ADDRESS(ROW(), " + col + "))=INDIRECT(ADDRESS(ROW() - 1, " + \
639                col + "))," + generate_duplicate_formula(headers, filter_columns[1:]) + ",0)"
640            return formula
641    # Next column not found, remove from recursion but just return whatever the next one is
642    return generate_duplicate_formula(headers, filter_columns[1:])
643
644
645# Helper function to start the recursive call to generate_duplicate_formula
646def generate_duplicate_formula_helper(headers):
647    filter_columns = MAIN_RESULT_COLUMNS
648    formula = generate_duplicate_formula(headers, filter_columns)
649    if (formula == "1"):
650        return ""
651    else:
652        # Final result needs to be prepended with =
653        return "=" + formula
654
655# Uses the list of headers and the info struct to come up with a list of values for each step
656# from the latest builds.
657def update_values(service, spreadsheet_id, headers, info):
658    data = []
659    for bot_name in info:
660        for step_name in info[bot_name]['step_names']:
661            sheet_name = format_sheet_name(bot_name, step_name)
662            values = []
663            # For each key in the list of headers, either add the corresponding value or add a blank
664            # value. It's necessary for the values to match the order of the headers
665            for key in headers[sheet_name]:
666                if key in info[bot_name] and key in REQUIRED_COLUMNS:
667                    values.append(info[bot_name][key])
668                elif key in info[bot_name][step_name]:
669                    values.append(info[bot_name][step_name][key])
670                elif key == "duplicate" and key in REQUIRED_COLUMNS:
671                    values.append(generate_duplicate_formula_helper(headers[sheet_name]))
672                else:
673                    values.append('')
674            LOGGER.info("Appending new rows to sheet '" + sheet_name + "'...")
675            try:
676                append_values(service, spreadsheet_id, sheet_name, values)
677            except Exception as error:
678                LOGGER.warning('%s\n' % str(error))
679
680
681# Updates the given spreadsheed_id with the info struct passed in.
682def update_spreadsheet(service, spreadsheet_id, info):
683    LOGGER.info('Opening spreadsheet...')
684    spreadsheet = get_spreadsheet(service, spreadsheet_id)
685    LOGGER.info('Parsing sheet names...')
686    sheet_names = get_sheet_names(info)
687    new_sheets = validate_sheets(spreadsheet, sheet_names)
688    if new_sheets:
689        LOGGER.info('Creating new sheets...')
690        create_sheets(service, spreadsheet_id, new_sheets)
691    LOGGER.info('Parsing sheet headers...')
692    headers = get_headers(service, spreadsheet_id, sheet_names)
693    update_headers(service, spreadsheet_id, headers, info)
694    update_filters(service, spreadsheet_id, headers, info, spreadsheet)
695    update_values(service, spreadsheet_id, headers, info)
696
697
698#####################
699# Main/helpers      #
700#####################
701
702
703# Loads or creates credentials and connects to the Sheets API. Returns a Spreadsheets object with
704# an open connection.
705def get_sheets_service(auth_path):
706    credentials_path = auth_path + '/credentials.json'
707    token_path = auth_path + '/token.pickle'
708    creds = None
709    if not os.path.exists(auth_path):
710        LOGGER.info("Creating auth dir '" + auth_path + "'")
711        os.makedirs(auth_path)
712    if not os.path.exists(credentials_path):
713        raise Exception('Missing credentials.json.\n'
714                        'Go to: https://developers.google.com/sheets/api/quickstart/python\n'
715                        "Under Step 1, click 'ENABLE THE GOOGLE SHEETS API'\n"
716                        "Click 'DOWNLOAD CLIENT CONFIGURATION'\n"
717                        'Save to your auth_path (' + auth_path + ') as credentials.json')
718    if os.path.exists(token_path):
719        with open(token_path, 'rb') as token:
720            creds = pickle.load(token)
721            LOGGER.info('Loaded credentials from ' + token_path)
722    if not creds or not creds.valid:
723        if creds and creds.expired and creds.refresh_token:
724            LOGGER.info('Refreshing credentials...')
725            creds.refresh(Request())
726        else:
727            LOGGER.info('Could not find credentials. Requesting new credentials.')
728            flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
729            creds = flow.run_local_server()
730        with open(token_path, 'wb') as token:
731            pickle.dump(creds, token)
732    service = build('sheets', 'v4', credentials=creds)
733    sheets = service.spreadsheets()
734    return sheets
735
736
737# Parse the input to the script
738def parse_args():
739    parser = argparse.ArgumentParser(os.path.basename(sys.argv[0]))
740    parser.add_argument(
741        '--auth_path',
742        default=HOME_DIR + '/.auth',
743        nargs='?',
744        help='path to directory containing authorization data '
745        '(credentials.json and token.pickle). '
746        '[default=<home>/.auth]')
747    parser.add_argument(
748        '--spreadsheet',
749        default='1uttk1z8lJ4ZsUY7wMdFauMzUxb048nh5l52zdrAznek',
750        nargs='?',
751        help='ID of the spreadsheet to write stats to. '
752        "[default='1uttk1z8lJ4ZsUY7wMdFauMzUxb048nh5l52zdrAznek']")
753    parser.add_argument(
754        '--verbosity',
755        default='INFO',
756        nargs='?',
757        help='Verbosity of output. Valid options are '
758        '[DEBUG, INFO, WARNING, ERROR]. '
759        '[default=INFO]')
760    return parser.parse_args()
761
762
763# Set up the logging with the right verbosity and output.
764def initialize_logging(verbosity):
765    handler = logging.StreamHandler()
766    formatter = logging.Formatter(fmt='%(levelname)s: %(message)s')
767    handler.setFormatter(formatter)
768    LOGGER.addHandler(handler)
769    if 'DEBUG' in verbosity:
770        LOGGER.setLevel(level=logging.DEBUG)
771    elif 'INFO' in verbosity:
772        LOGGER.setLevel(level=logging.INFO)
773    elif 'WARNING' in verbosity:
774        LOGGER.setLevel(level=logging.WARNING)
775    elif 'ERROR' in verbosity:
776        LOGGER.setLevel(level=logging.ERROR)
777    else:
778        LOGGER.setLevel(level=logging.INFO)
779
780
781def main():
782    os.chdir(ROOT_DIR)
783    args = parse_args()
784    verbosity = args.verbosity.strip().upper()
785    initialize_logging(verbosity)
786    auth_path = args.auth_path.replace('\\', '/')
787    try:
788        service = get_sheets_service(auth_path)
789    except Exception as error:
790        LOGGER.error('%s\n' % str(error))
791        exit(1)
792
793    info = {}
794    LOGGER.info('Building info struct...')
795    for bot_name in BOT_NAMES:
796        LOGGER.info("Parsing bot '" + bot_name + "'...")
797        try:
798            info[bot_name] = get_bot_info(BOT_NAME_PREFIX + bot_name)
799        except Exception as error:
800            LOGGER.error('%s\n' % str(error))
801
802    LOGGER.info('Updating sheets...')
803    try:
804        update_spreadsheet(service, args.spreadsheet, info)
805    except Exception as error:
806        LOGGER.error('%s\n' % str(error))
807        quit(1)
808
809    LOGGER.info('Info was successfully parsed to sheet: https://docs.google.com/spreadsheets/d/' +
810                args.spreadsheet)
811
812
813if __name__ == '__main__':
814    sys.exit(main())
815