1#!/usr/bin/env vpython 2# Copyright 2017 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""This script updates network traffic annotations sheet. To run the script, you 7should first generate annotations.tsv using traffic_annotation_auditor, and then 8call: 9update_annotations_sheet --config=[settings.json] [path_to_annotations.tsv] 10 11Run update_annotations_sheet --config-help for help on configuration file. 12 13TODO(rhalavati): Add tests. 14""" 15 16from __future__ import print_function 17 18import argparse 19import csv 20import datetime 21import httplib2 22import io 23import json 24import os 25import sys 26 27from apiclient import discovery 28from infra_libs import luci_auth 29from oauth2client import client 30from oauth2client import tools 31from oauth2client.file import Storage 32 33 34class SheetEditor(): 35 """Loads and updates traffic annotation's sheet.""" 36 37 # If modifying these scopes, delete your previously saved credentials. 38 SCOPES = "https://www.googleapis.com/auth/spreadsheets" 39 APPLICATION_NAME = "Chrome Network Traffic Annotations Spreadsheet Updater" 40 41 def __init__(self, spreadsheet_id, annotations_sheet_name, 42 changes_sheet_name, silent_change_columns, 43 last_update_column_name, credentials_file_path, 44 client_secret_file_path, verbose): 45 """ Initializes the SheetEditor. Please refer to 'PrintConfigHelp' function 46 for description of input arguments. 47 48 Args: 49 spreadsheet_id: str 50 ID of annotations spreadsheet. 51 annotations_sheet_name: str 52 Name of the sheet that contains the annotations. 53 changes_sheet_name: str 54 Name of the sheet that contains the changes stats. 55 silent_change_columns: list of str 56 List of the columns whose changes are not reported in the stats. 57 last_update_column_name: str 58 Header of the column that keeps the latest update date. 59 credentials_file_path: str 60 Absolute path to read/save user credentials. 61 client_secret_file_path: str 62 Absolute path to read client_secret.json. 63 verbose: bool 64 Flag requesting dump of details of actions. 65 """ 66 self.service = self._InitializeService( 67 self._GetCredentials(credentials_file_path, client_secret_file_path)) 68 self.spreadsheet_id = spreadsheet_id 69 self.annotations_sheet_name = annotations_sheet_name 70 self.changes_sheet_name = changes_sheet_name 71 self.silent_change_columns = silent_change_columns 72 self.last_update_column_name = last_update_column_name 73 self.annotations_sheet_id = self._GetAnnotationsSheetId() 74 self.required_row_updates = [] 75 self.required_cell_updates = [] 76 self.delete_count = 0 77 self.insert_count = 0 78 self.update_count = 0 79 self.verbose = verbose 80 self.today = datetime.datetime.now().strftime("%m/%d/%Y") 81 82 83 def _InitializeService(self, credentials): 84 """ Initializes the Google Sheets API service. 85 86 Args: 87 credentials: OAuth2Credentials user credentials. 88 89 Returns: 90 googleapiclient.discovery.Resource Spreadsheet API service. 91 """ 92 http = credentials.authorize(httplib2.Http()) 93 discoveryUrl = ("https://sheets.googleapis.com/$discovery/rest?version=v4") 94 return discovery.build("sheets", "v4", http=http, 95 discoveryServiceUrl=discoveryUrl) 96 97 98 def _GetCredentials(self, credentials_file_path, client_secret_file_path): 99 """ Gets valid user credentials from storage. If nothing has been stored, or 100 if the stored credentials are invalid, the OAuth2 flow is completed to 101 obtain the new credentials. 102 103 When running in the buildbot, uses LUCI credentials instead. 104 105 Args: 106 credentials_file_path: str Absolute path to read/save user credentials. 107 client_secret_file_path: str Absolute path to read client_secret.json. 108 109 Returns: 110 OAuth2Credentials The obtained user credentials. 111 """ 112 if luci_auth.available(): 113 return luci_auth.LUCICredentials(scopes=[self.SCOPES]) 114 115 store = Storage(credentials_file_path) 116 credentials = store.get() 117 if not credentials or credentials.invalid: 118 flow = client.flow_from_clientsecrets(client_secret_file_path, 119 self.SCOPES) 120 flow.user_agent = self.APPLICATION_NAME 121 flags = tools.argparser.parse_args([]) 122 credentials = tools.run_flow(flow, store, flags) 123 print("Storing credentials to " + credentials_file_path) 124 return credentials 125 126 127 def _GetAnnotationsSheetId(self): 128 """ Gets the id of the sheet containing annotations table. 129 130 Returns: 131 int Id of the sheet. 132 """ 133 response = self.service.spreadsheets().get( 134 spreadsheetId=self.spreadsheet_id, 135 ranges=self.annotations_sheet_name, 136 includeGridData=False).execute() 137 return response["sheets"][0]["properties"]["sheetId"] 138 139 140 def LoadAnnotationsSheet(self): 141 """ Loads the sheet's content. 142 143 Returns: 144 list of list Table of annotations loaded from the trix. 145 """ 146 result = self.service.spreadsheets().values().get( 147 spreadsheetId=self.spreadsheet_id, 148 range=self.annotations_sheet_name).execute() 149 return result.get("values", []) 150 151 152 def _CreateInsertRequest(self, row): 153 self.required_row_updates.append( 154 { "insertDimension": { 155 "range": { 156 "sheetId": self.annotations_sheet_id, 157 "dimension": "ROWS", 158 "startIndex": row, # 0 index. 159 "endIndex": row + 1 160 } 161 } 162 }) 163 self.insert_count += 1 164 165 166 def _CreateAppendRequest(self, row_count): 167 self.required_row_updates.append( 168 { "appendDimension": { 169 "sheetId": self.annotations_sheet_id, 170 "dimension": "ROWS", 171 "length": row_count 172 } 173 }) 174 self.insert_count += row_count 175 176 177 def _CreateDeleteRequest(self, row): 178 self.required_row_updates.append( 179 { "deleteDimension": { 180 "range": { 181 "sheetId": self.annotations_sheet_id, 182 "dimension": "ROWS", 183 "startIndex": row, 184 "endIndex": row + 1 185 } 186 } 187 }) 188 self.delete_count += 1 189 190 191 def _CreateUpdateRequest(self, row, column, value): 192 # If having more than 26 columns, update cell_name. 193 assert(column < 26) 194 cell_name = "%s%i" % (chr(65 + column), 1 + row) 195 self.required_cell_updates.append( 196 { "range": "%s!%s:%s" % ( 197 self.annotations_sheet_name, cell_name, cell_name), 198 "values": [[value]] }) 199 200 201 def GenerateUpdates(self, file_contents): 202 """ Generates required updates to refresh the sheet, using the input file 203 contents. 204 205 Args: 206 file_contents: list of list Table of annotations read from file. Each item 207 represents one row of the annotation table, and each row is presented 208 as a list of its column values. 209 210 Returns: 211 bool Flag specifying if everything was OK or not. 212 """ 213 sheet_contents = self.LoadAnnotationsSheet() 214 if not sheet_contents: 215 print("Could not read previous content.") 216 return False 217 218 headers = file_contents[0] 219 silent_change_column_indices = [] 220 for title in self.silent_change_columns: 221 if title not in headers: 222 print("ERROR: Could not find %s column." % title) 223 return False 224 silent_change_column_indices.append(headers.index(title)) 225 226 last_update_column = headers.index(self.last_update_column_name) 227 228 # Step 1: Compare old and new contents, generate add/remove requests so that 229 # both contents would become the same size with matching unique ids (at 230 # column 0). 231 # Ignores header row (row 0). 232 old_set = set(row[0] for row in sheet_contents[1:]) 233 new_set = set(row[0] for row in file_contents[1:]) 234 removed_ids = old_set - new_set 235 added_ids = list(new_set - old_set) 236 added_ids.sort() 237 if self.verbose: 238 for id in removed_ids: 239 print("Deleted: %s" % id) 240 for id in added_ids: 241 print("Added: %s" % id) 242 243 empty_row = [''] * len(file_contents[0]) 244 # Skip first row (it's the header row). 245 row = 1 246 while row < len(sheet_contents): 247 row_id = sheet_contents[row][0] 248 # If a row is removed, remove it from previous sheet. 249 if row_id in removed_ids: 250 self._CreateDeleteRequest(row) 251 sheet_contents.pop(row) 252 continue 253 # If there are rows to add, and they should be before current row, insert 254 # an empty row before current row. The empty row will be filled later. 255 if added_ids and added_ids[0] < row_id: 256 self._CreateInsertRequest(row) 257 sheet_contents.insert(row, empty_row[:]) 258 added_ids.pop(0) 259 row += 1 260 261 # If there are still rows to be added, they should come at the end. 262 if added_ids: 263 self._CreateAppendRequest(len(added_ids)) 264 while added_ids: 265 sheet_contents.append(empty_row[:]) 266 added_ids.pop() 267 268 assert(len(file_contents) == len(sheet_contents)) 269 270 # Step 2: Compare cells of old and new contents, issue requests to update 271 # cells with different values. Ignore headers row. 272 for row in range(1, len(file_contents)): 273 file_row = file_contents[row] 274 sheet_row = sheet_contents[row] 275 276 major_update = False 277 for col in range(len(file_row)): 278 # Ignore 'Last Update' column for now. 279 if col == last_update_column: 280 continue 281 if file_row[col] != sheet_row[col]: 282 self._CreateUpdateRequest(row, col, file_row[col]) 283 if self.verbose and sheet_row[0]: 284 print("Updating: %s - %s" % (file_row[0], file_contents[0][col])) 285 if col not in silent_change_column_indices: 286 major_update = True 287 # If there has been a change in a column that is not silently updated, 288 # update the date as well. 289 if major_update: 290 self._CreateUpdateRequest(row, last_update_column, self.today) 291 # If the row is not entirely new, increase the update count. 292 if sheet_row[0]: 293 self.update_count += 1 294 return True 295 296 297 def ApplyUpdates(self): 298 """ Applies the updates stored in |self.required_row_updates| and 299 |self.required_cell_updates| to the sheet. 300 """ 301 # Insert/Remove rows. 302 if self.required_row_updates: 303 self.service.spreadsheets().batchUpdate( 304 spreadsheetId=self.spreadsheet_id, 305 body={"requests": self.required_row_updates}).execute() 306 307 # Refresh Cells. 308 if self.required_cell_updates: 309 batch_update_values_request_body = { 310 "value_input_option": "RAW", 311 "data": self.required_cell_updates 312 } 313 self.service.spreadsheets().values().batchUpdate( 314 spreadsheetId=self.spreadsheet_id, 315 body=batch_update_values_request_body).execute() 316 317 # Add Report Line. 318 # TODO(https://crbug.com/656607): Remove changes stat and related config. 319 if self.insert_count or self.delete_count or self.update_count: 320 value_range = "%s!A1:B1000" % self.changes_sheet_name 321 append_body = { 322 "range": value_range, 323 "majorDimension": "ROWS", 324 "values": [[self.today, self.GiveUpdateSummary()]] 325 } 326 self.service.spreadsheets().values().append( 327 spreadsheetId=self.spreadsheet_id, 328 range=value_range, 329 valueInputOption="RAW", 330 body=append_body).execute() 331 332 333 def GiveUpdateSummary(self): 334 return "New annotations: %s, Modified annotations: %s, " \ 335 "Removed annotations: %s" % ( 336 self.insert_count, self.update_count, self.delete_count) 337 338 339def utf_8_encoder(input_file): 340 for line in input_file: 341 yield line.encode("utf-8") 342 343 344def LoadTSVFile(file_path, verbose): 345 """ Loads annotations TSV file. 346 347 Args: 348 file_path: str Path to the TSV file. 349 verbose: bool Whether to print messages about ignored rows. 350 351 Returns: 352 list of list Table of loaded annotations. 353 """ 354 rows = [] 355 with io.open(file_path, mode="r", encoding="utf-8") as csvfile: 356 # CSV library does not support unicode, so encoding to utf-8 and back. 357 reader = csv.reader(utf_8_encoder(csvfile), delimiter='\t') 358 for row in reader: 359 row = [unicode(col, 'utf-8') for col in row] 360 # If the last column of the file_row is empty, the row belongs to a 361 # platform different from the one that TSV file is generated on, hence it 362 # should be ignored. 363 if row[-1]: 364 rows.append(row) 365 elif verbose: 366 print("Ignored from other platforms: %s" % row[0]) 367 return rows 368 369 370def PrintConfigHelp(): 371 print("The config.json file should have the following items:\n" 372 "spreadsheet_id:\n" 373 " ID of annotations spreadsheet.\n" 374 "annotations_sheet_name:\n" 375 " Name of the sheet that contains the annotations.\n" 376 "changes_sheet_name:\n" 377 " Name of the sheet that contains the changes stats.\n" 378 "silent_change_columns:\n" 379 " List of the columns whose changes are not reported in the stats.\n" 380 "last_update_column_name:\n" 381 " Header of the column that keeps the latest update date.\n" 382 "credentials_file_path:\n" 383 " Absolute path of the file that keeps user credentials.\n" 384 "client_secret_file_path:\n" 385 " Absolute path of the file that keeps client_secret.json. The file\n" 386 " can be created as specified in:\n" 387 " https://developers.google.com/sheets/api/quickstart/python") 388 389 390def main(): 391 parser = argparse.ArgumentParser( 392 description="Network Traffic Annotations Sheet Updater") 393 parser.add_argument( 394 "--config-file", 395 help="Configurations file.") 396 parser.add_argument( 397 "--annotations-file", 398 help="TSV annotations file exported from auditor.") 399 parser.add_argument( 400 '--verbose', action='store_true', 401 help='Reports all updates.') 402 parser.add_argument( 403 '--force', action='store_true', 404 help='Performs all actions without confirmation.') 405 parser.add_argument( 406 '--config-help', action='store_true', 407 help='Shows the configurations help.') 408 args = parser.parse_args() 409 410 if args.config_help: 411 PrintConfigHelp() 412 return 0 413 414 # Load and parse config file. 415 with open(args.config_file) as config_file: 416 config = json.load(config_file) 417 418 # Load and parse annotations file. 419 file_content = LoadTSVFile(args.annotations_file, args.verbose) 420 if not file_content: 421 print("Could not read annotations file.") 422 return -1 423 424 sheet_editor = SheetEditor( 425 spreadsheet_id = config["spreadsheet_id"], 426 annotations_sheet_name = config["annotations_sheet_name"], 427 changes_sheet_name = config["changes_sheet_name"], 428 silent_change_columns = config["silent_change_columns"], 429 last_update_column_name = config["last_update_column_name"], 430 credentials_file_path = config.get("credentials_file_path", None), 431 client_secret_file_path = config.get("client_secret_file_path", None), 432 verbose = args.verbose) 433 if not sheet_editor.GenerateUpdates(file_content): 434 return -1 435 436 if sheet_editor.required_cell_updates or sheet_editor.required_row_updates: 437 print("%s" % sheet_editor.GiveUpdateSummary()) 438 if not args.force: 439 print("Proceed with update?") 440 if raw_input("(Y/n): ").strip().lower() != "y": 441 return -1 442 sheet_editor.ApplyUpdates() 443 print("Updates applied.") 444 else: 445 print("No updates required.") 446 447 return 0 448 449 450if __name__ == "__main__": 451 sys.exit(main()) 452