1#!/usr/bin/env vpython
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""This script updates network traffic annotations sheet. To run the script, you
7should first generate annotations.tsv using traffic_annotation_auditor, and then
8call:
9update_annotations_sheet --config=[settings.json] [path_to_annotations.tsv]
10
11Run update_annotations_sheet --config-help for help on configuration file.
12
13TODO(rhalavati): Add tests.
14"""
15
16from __future__ import print_function
17
18import argparse
19import csv
20import datetime
21import httplib2
22import io
23import json
24import os
25import sys
26
27from apiclient import discovery
28from infra_libs import luci_auth
29from oauth2client import client
30from oauth2client import tools
31from oauth2client.file import Storage
32
33
34class SheetEditor():
35  """Loads and updates traffic annotation's sheet."""
36
37  # If modifying these scopes, delete your previously saved credentials.
38  SCOPES = "https://www.googleapis.com/auth/spreadsheets"
39  APPLICATION_NAME = "Chrome Network Traffic Annotations Spreadsheet Updater"
40
41  def __init__(self, spreadsheet_id, annotations_sheet_name,
42               changes_sheet_name, silent_change_columns,
43               last_update_column_name, credentials_file_path,
44               client_secret_file_path, verbose):
45    """ Initializes the SheetEditor. Please refer to 'PrintConfigHelp' function
46    for description of input arguments.
47
48    Args:
49      spreadsheet_id: str
50          ID of annotations spreadsheet.
51      annotations_sheet_name: str
52          Name of the sheet that contains the annotations.
53      changes_sheet_name: str
54          Name of the sheet that contains the changes stats.
55      silent_change_columns: list of str
56          List of the columns whose changes are not reported in the stats.
57      last_update_column_name: str
58          Header of the column that keeps the latest update date.
59      credentials_file_path: str
60          Absolute path to read/save user credentials.
61      client_secret_file_path: str
62          Absolute path to read client_secret.json.
63      verbose: bool
64          Flag requesting dump of details of actions.
65    """
66    self.service = self._InitializeService(
67        self._GetCredentials(credentials_file_path, client_secret_file_path))
68    self.spreadsheet_id = spreadsheet_id
69    self.annotations_sheet_name = annotations_sheet_name
70    self.changes_sheet_name = changes_sheet_name
71    self.silent_change_columns = silent_change_columns
72    self.last_update_column_name = last_update_column_name
73    self.annotations_sheet_id = self._GetAnnotationsSheetId()
74    self.required_row_updates = []
75    self.required_cell_updates = []
76    self.delete_count = 0
77    self.insert_count = 0
78    self.update_count = 0
79    self.verbose = verbose
80    self.today = datetime.datetime.now().strftime("%m/%d/%Y")
81
82
83  def _InitializeService(self, credentials):
84    """ Initializes the Google Sheets API service.
85
86    Args:
87      credentials: OAuth2Credentials user credentials.
88
89    Returns:
90      googleapiclient.discovery.Resource Spreadsheet API service.
91    """
92    http = credentials.authorize(httplib2.Http())
93    discoveryUrl = ("https://sheets.googleapis.com/$discovery/rest?version=v4")
94    return discovery.build("sheets", "v4", http=http,
95                           discoveryServiceUrl=discoveryUrl)
96
97
98  def _GetCredentials(self, credentials_file_path, client_secret_file_path):
99    """ Gets valid user credentials from storage. If nothing has been stored, or
100    if the stored credentials are invalid, the OAuth2 flow is completed to
101    obtain the new credentials.
102
103    When running in the buildbot, uses LUCI credentials instead.
104
105    Args:
106      credentials_file_path: str Absolute path to read/save user credentials.
107      client_secret_file_path: str Absolute path to read client_secret.json.
108
109    Returns:
110      OAuth2Credentials The obtained user credentials.
111    """
112    if luci_auth.available():
113      return luci_auth.LUCICredentials(scopes=[self.SCOPES])
114
115    store = Storage(credentials_file_path)
116    credentials = store.get()
117    if not credentials or credentials.invalid:
118      flow = client.flow_from_clientsecrets(client_secret_file_path,
119                                            self.SCOPES)
120      flow.user_agent = self.APPLICATION_NAME
121      flags = tools.argparser.parse_args([])
122      credentials = tools.run_flow(flow, store, flags)
123      print("Storing credentials to " + credentials_file_path)
124    return credentials
125
126
127  def _GetAnnotationsSheetId(self):
128    """ Gets the id of the sheet containing annotations table.
129
130    Returns:
131      int Id of the sheet.
132    """
133    response = self.service.spreadsheets().get(
134        spreadsheetId=self.spreadsheet_id,
135        ranges=self.annotations_sheet_name,
136        includeGridData=False).execute()
137    return response["sheets"][0]["properties"]["sheetId"]
138
139
140  def LoadAnnotationsSheet(self):
141    """ Loads the sheet's content.
142
143    Returns:
144      list of list Table of annotations loaded from the trix.
145    """
146    result = self.service.spreadsheets().values().get(
147        spreadsheetId=self.spreadsheet_id,
148        range=self.annotations_sheet_name).execute()
149    return result.get("values", [])
150
151
152  def _CreateInsertRequest(self, row):
153    self.required_row_updates.append(
154        { "insertDimension": {
155            "range": {
156              "sheetId": self.annotations_sheet_id,
157              "dimension": "ROWS",
158              "startIndex": row, # 0 index.
159              "endIndex": row + 1
160            }
161          }
162        })
163    self.insert_count += 1
164
165
166  def _CreateAppendRequest(self, row_count):
167    self.required_row_updates.append(
168        { "appendDimension": {
169            "sheetId": self.annotations_sheet_id,
170            "dimension": "ROWS",
171            "length": row_count
172          }
173        })
174    self.insert_count += row_count
175
176
177  def _CreateDeleteRequest(self, row):
178    self.required_row_updates.append(
179        { "deleteDimension": {
180            "range": {
181              "sheetId": self.annotations_sheet_id,
182              "dimension": "ROWS",
183              "startIndex": row,
184              "endIndex": row + 1
185            }
186          }
187        })
188    self.delete_count += 1
189
190
191  def _CreateUpdateRequest(self, row, column, value):
192    # If having more than 26 columns, update cell_name.
193    assert(column < 26)
194    cell_name = "%s%i" % (chr(65 + column), 1 + row)
195    self.required_cell_updates.append(
196        { "range": "%s!%s:%s" % (
197              self.annotations_sheet_name, cell_name, cell_name),
198          "values": [[value]] })
199
200
201  def GenerateUpdates(self, file_contents):
202    """ Generates required updates to refresh the sheet, using the input file
203    contents.
204
205    Args:
206      file_contents: list of list Table of annotations read from file. Each item
207          represents one row of the annotation table, and each row is presented
208          as a list of its column values.
209
210    Returns:
211      bool Flag specifying if everything was OK or not.
212    """
213    sheet_contents = self.LoadAnnotationsSheet()
214    if not sheet_contents:
215      print("Could not read previous content.")
216      return False
217
218    headers = file_contents[0]
219    silent_change_column_indices = []
220    for title in self.silent_change_columns:
221      if title not in headers:
222        print("ERROR: Could not find %s column." % title)
223        return False
224      silent_change_column_indices.append(headers.index(title))
225
226    last_update_column = headers.index(self.last_update_column_name)
227
228    # Step 1: Compare old and new contents, generate add/remove requests so that
229    # both contents would become the same size with matching unique ids (at
230    # column 0).
231    # Ignores header row (row 0).
232    old_set = set(row[0] for row in sheet_contents[1:])
233    new_set = set(row[0] for row in file_contents[1:])
234    removed_ids = old_set - new_set
235    added_ids = list(new_set - old_set)
236    added_ids.sort()
237    if self.verbose:
238      for id in removed_ids:
239        print("Deleted: %s" % id)
240      for id in added_ids:
241        print("Added: %s" % id)
242
243    empty_row = [''] * len(file_contents[0])
244    # Skip first row (it's the header row).
245    row = 1
246    while row < len(sheet_contents):
247      row_id = sheet_contents[row][0]
248      # If a row is removed, remove it from previous sheet.
249      if row_id in removed_ids:
250        self._CreateDeleteRequest(row)
251        sheet_contents.pop(row)
252        continue
253      # If there are rows to add, and they should be before current row, insert
254      # an empty row before current row. The empty row will be filled later.
255      if added_ids and added_ids[0] < row_id:
256        self._CreateInsertRequest(row)
257        sheet_contents.insert(row, empty_row[:])
258        added_ids.pop(0)
259      row += 1
260
261    # If there are still rows to be added, they should come at the end.
262    if added_ids:
263      self._CreateAppendRequest(len(added_ids))
264      while added_ids:
265        sheet_contents.append(empty_row[:])
266        added_ids.pop()
267
268    assert(len(file_contents) == len(sheet_contents))
269
270    # Step 2: Compare cells of old and new contents, issue requests to update
271    # cells with different values. Ignore headers row.
272    for row in range(1, len(file_contents)):
273      file_row = file_contents[row]
274      sheet_row = sheet_contents[row]
275
276      major_update = False
277      for col in range(len(file_row)):
278        # Ignore 'Last Update' column for now.
279        if col == last_update_column:
280          continue
281        if file_row[col] != sheet_row[col]:
282          self._CreateUpdateRequest(row, col, file_row[col])
283          if self.verbose and sheet_row[0]:
284            print("Updating: %s - %s" % (file_row[0], file_contents[0][col]))
285          if col not in silent_change_column_indices:
286            major_update = True
287      # If there has been a change in a column that is not silently updated,
288      # update the date as well.
289      if major_update:
290        self._CreateUpdateRequest(row, last_update_column, self.today)
291        # If the row is not entirely new, increase the update count.
292        if sheet_row[0]:
293          self.update_count += 1
294    return True
295
296
297  def ApplyUpdates(self):
298    """ Applies the updates stored in |self.required_row_updates| and
299    |self.required_cell_updates| to the sheet.
300    """
301    # Insert/Remove rows.
302    if self.required_row_updates:
303      self.service.spreadsheets().batchUpdate(
304          spreadsheetId=self.spreadsheet_id,
305          body={"requests": self.required_row_updates}).execute()
306
307    # Refresh Cells.
308    if self.required_cell_updates:
309      batch_update_values_request_body = {
310        "value_input_option": "RAW",
311        "data": self.required_cell_updates
312      }
313      self.service.spreadsheets().values().batchUpdate(
314          spreadsheetId=self.spreadsheet_id,
315          body=batch_update_values_request_body).execute()
316
317    # Add Report Line.
318    # TODO(https://crbug.com/656607): Remove changes stat and related config.
319    if self.insert_count or self.delete_count or self.update_count:
320      value_range = "%s!A1:B1000" % self.changes_sheet_name
321      append_body = {
322          "range": value_range,
323          "majorDimension": "ROWS",
324          "values": [[self.today, self.GiveUpdateSummary()]]
325      }
326      self.service.spreadsheets().values().append(
327          spreadsheetId=self.spreadsheet_id,
328          range=value_range,
329          valueInputOption="RAW",
330          body=append_body).execute()
331
332
333  def GiveUpdateSummary(self):
334    return "New annotations: %s, Modified annotations: %s, " \
335           "Removed annotations: %s" % (
336                self.insert_count, self.update_count, self.delete_count)
337
338
339def utf_8_encoder(input_file):
340  for line in input_file:
341    yield line.encode("utf-8")
342
343
344def LoadTSVFile(file_path, verbose):
345  """ Loads annotations TSV file.
346
347  Args:
348    file_path: str Path to the TSV file.
349    verbose: bool Whether to print messages about ignored rows.
350
351  Returns:
352    list of list Table of loaded annotations.
353  """
354  rows = []
355  with io.open(file_path, mode="r", encoding="utf-8") as csvfile:
356    # CSV library does not support unicode, so encoding to utf-8 and back.
357    reader = csv.reader(utf_8_encoder(csvfile), delimiter='\t')
358    for row in reader:
359      row = [unicode(col, 'utf-8') for col in row]
360      # If the last column of the file_row is empty, the row belongs to a
361      # platform different from the one that TSV file is generated on, hence it
362      # should be ignored.
363      if row[-1]:
364        rows.append(row)
365      elif verbose:
366        print("Ignored from other platforms: %s" % row[0])
367  return rows
368
369
370def PrintConfigHelp():
371  print("The config.json file should have the following items:\n"
372        "spreadsheet_id:\n"
373        "  ID of annotations spreadsheet.\n"
374        "annotations_sheet_name:\n"
375        "  Name of the sheet that contains the annotations.\n"
376        "changes_sheet_name:\n"
377        "  Name of the sheet that contains the changes stats.\n"
378        "silent_change_columns:\n"
379        "  List of the columns whose changes are not reported in the stats.\n"
380        "last_update_column_name:\n"
381        "  Header of the column that keeps the latest update date.\n"
382        "credentials_file_path:\n"
383        "  Absolute path of the file that keeps user credentials.\n"
384        "client_secret_file_path:\n"
385        "  Absolute path of the file that keeps client_secret.json. The file\n"
386        "  can be created as specified in:\n"
387        "  https://developers.google.com/sheets/api/quickstart/python")
388
389
390def main():
391  parser = argparse.ArgumentParser(
392      description="Network Traffic Annotations Sheet Updater")
393  parser.add_argument(
394      "--config-file",
395      help="Configurations file.")
396  parser.add_argument(
397      "--annotations-file",
398      help="TSV annotations file exported from auditor.")
399  parser.add_argument(
400      '--verbose', action='store_true',
401      help='Reports all updates.')
402  parser.add_argument(
403      '--force', action='store_true',
404      help='Performs all actions without confirmation.')
405  parser.add_argument(
406      '--config-help', action='store_true',
407      help='Shows the configurations help.')
408  args = parser.parse_args()
409
410  if args.config_help:
411    PrintConfigHelp()
412    return 0
413
414  # Load and parse config file.
415  with open(args.config_file) as config_file:
416    config = json.load(config_file)
417
418  # Load and parse annotations file.
419  file_content = LoadTSVFile(args.annotations_file, args.verbose)
420  if not file_content:
421    print("Could not read annotations file.")
422    return -1
423
424  sheet_editor = SheetEditor(
425      spreadsheet_id = config["spreadsheet_id"],
426      annotations_sheet_name = config["annotations_sheet_name"],
427      changes_sheet_name = config["changes_sheet_name"],
428      silent_change_columns = config["silent_change_columns"],
429      last_update_column_name = config["last_update_column_name"],
430      credentials_file_path = config.get("credentials_file_path", None),
431      client_secret_file_path = config.get("client_secret_file_path", None),
432      verbose = args.verbose)
433  if not sheet_editor.GenerateUpdates(file_content):
434    return -1
435
436  if sheet_editor.required_cell_updates or sheet_editor.required_row_updates:
437    print("%s" % sheet_editor.GiveUpdateSummary())
438    if not args.force:
439      print("Proceed with update?")
440      if raw_input("(Y/n): ").strip().lower() != "y":
441        return -1
442    sheet_editor.ApplyUpdates()
443    print("Updates applied.")
444  else:
445    print("No updates required.")
446
447  return 0
448
449
450if __name__ == "__main__":
451  sys.exit(main())
452