1# Copyright (c) 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Bisect repackage tool for Linux.
5
6This script repacakges chrome builds for manual bisect script.
7"""
8
9from __future__ import print_function
10
11from functools import partial
12import json
13import logging
14from multiprocessing import Pool
15import optparse
16import os
17import re
18import sys
19import tempfile
20import threading
21import urllib
22import bisect_repackage_utils
23import re
24# This script uses cloud_storage module which contains gsutils wrappers.
25# cloud_storage module is a part of catapult repo, so please make sure
26# catapult is checked out before running this script.
27_PY_UTILS_PATH = os.path.abspath(os.path.join(
28    os.path.dirname(__file__), '..', '..', 'third_party', 'catapult',
29    'common', 'py_utils'))
30if _PY_UTILS_PATH not in sys.path:
31  sys.path.insert(1, _PY_UTILS_PATH)
32from py_utils import cloud_storage
33
34# Declares required files to run manual bisect script on chrome Linux
35# builds in perf. Binary files that should be stripped to reduce zip file
36# size are declared. The file list was gotten from the local chrome
37# executable path. (This can be retrieved by typing 'chrome://version'
38# in chrome and following the executable path. The list needs to be updated if
39# future chrome versions require additional files.
40CHROME_REQUIRED_FILES = {
41    'arm': ['apks/'],
42    'arm64': ['apks/'],
43    'linux': [
44        'chrome',
45        'chrome_100_percent.pak',
46        'chrome_200_percent.pak',
47        'chromedriver',
48        'crashpad_handler',
49        'default_apps/',
50        'icudtl.dat',
51        'ClearKeyCdm/',
52        'WidevineCdm/',
53        'locales/',
54        'nacl_helper',
55        'nacl_helper_bootstrap',
56        'nacl_helper_nonsfi',
57        'nacl_irt_x86_64.nexe',
58        'pnacl/',
59        'product_logo_48.png',
60        'resources/',
61        'resources.pak',
62        'v8_context_snapshot.bin',
63        'xdg-mime',
64        'xdg-settings'
65    ],
66    'win64': [
67        'chrome.dll',
68        'chrome.exe',
69        'chrome_100_percent.pak',
70        'chrome_200_percent.pak',
71        'chrome_child.dll',
72        'chrome_elf.dll',
73        'chrome_watcher.dll',
74        'chromedriver.exe',
75        'default_apps',
76        'd3dcompiler_47.dll',
77        'icudtl.dat',
78        'libEGL.dll',
79        'libGLESv2.dll',
80        'locales',
81        'nacl_irt_x86_64.nexe',
82        'PepperFlash',
83        'resources.pak',
84        'SecondaryTile.png',
85        'v8_context_snapshot.bin'
86    ],
87    'mac': [
88        'chromedriver',
89        'Google Chrome.app'
90    ]
91}
92
93CHROME_WHITELIST_FILES = {
94    'win64': '^\d+\.\d+\.\d+\.\d+\.manifest$',
95}
96# No stripping symbols from android, windows or mac archives
97CHROME_STRIP_LIST = {
98    'linux': [
99        'chrome',
100        'crashpad_handler',
101        'nacl_helper'
102    ]
103}
104
105# API to convert Githash to Commit position number.
106CHROMIUM_GITHASH_TO_SVN_URL = (
107    'https://cr-rev.appspot.com/_ah/api/crrev/v1/commit/%s')
108CHROMIUM_CP_TO_GITHASH = (
109'https://cr-rev.appspot.com/_ah/api/crrev/v1/redirect/%s')
110
111REVISION_MAP_FILE = 'revision_map.json'
112
113BUILDER_NAME = {
114    'arm': 'Android Builder',
115    'arm64': 'Android arm64 Builder',
116    'linux': 'Linux Builder',
117    'mac': 'Mac Builder',
118    'win32': 'Win Builder',
119    'win64': 'Win x64 Builder'
120}
121
122ARCHIVE_PREFIX = {
123    'arm': 'full-build-linux',
124    'arm64': 'full-build-linux',
125    'linux': 'full-build-linux',
126    'mac': 'full-build-mac',
127    'win32': 'full-build-win32',
128    'win64': 'full-build-win32'
129}
130
131CHROME_TEST_BUCKET_SUBFOLDER = 'official-by-commit'
132
133
134class ChromeExecutionError(Exception):
135  """Raised when Chrome execution fails."""
136  pass
137
138class GitConversionError(Exception):
139  """Raised when Chrome execution fails."""
140  pass
141
142class PathContext(object):
143  """Stores information to repackage from a bucket to another.
144
145  A PathContext is used to carry the information used to construct URLs and
146  paths when dealing with the storage server and archives.
147  """
148
149  def __init__(self, source_bucket, repackage_bucket,
150               archive, revision_file=REVISION_MAP_FILE):
151    super(PathContext, self).__init__()
152    self.archive = archive
153    self.builder_name = BUILDER_NAME[archive]
154    self.original_gs_bucket = source_bucket
155    self.original_remote_path = BUILDER_NAME[archive]
156    self.repackage_gs_bucket = repackage_bucket
157    self.repackage_remote_path = '%s/%s' % (CHROME_TEST_BUCKET_SUBFOLDER,
158                                            BUILDER_NAME[archive])
159
160    self.file_prefix = ARCHIVE_PREFIX[archive]
161    self.revision_file = os.path.join(os.getcwd(), revision_file)
162
163  def GetExtractedDir(self):
164    # Perf builders archives the binaries in out/Release directory.
165    if self.archive in ['arm', 'arm64']:
166      return os.path.join('out', 'Release')
167    return self.file_prefix
168
169
170def get_cp_from_hash(git_hash):
171  """Converts a git hash to commit position number."""
172  json_url = CHROMIUM_GITHASH_TO_SVN_URL % git_hash
173  response = urllib.urlopen(json_url)
174  if response.getcode() == 200:
175    try:
176      data = json.loads(response.read())
177    except Exception,e:
178      logging.warning('JSON URL: %s, Error Message: %s' % json_url, e)
179      raise GitConversionError
180  else:
181      logging.warning('JSON URL: %s, Error Message: %s' % json_url, e)
182      raise GitConversionError
183  if 'number' in data:
184    return data['number']
185  logging.warning('JSON URL: %s, Error Message: %s' % json_url, e)
186  raise GitConversionError
187
188
189def create_cp_from_hash_map(hash_list):
190  """Returns dict used for conversion of hash list.
191
192  Creates a dictionary that maps from Commit position number
193  to corresponding GitHash.
194  """
195  hash_map = {}
196  for git_hash in hash_list:
197    try:
198      cp_num = get_cp_from_hash(git_hash)
199      hash_map[cp_num] = git_hash
200    except GitConversionError:
201      pass
202  return hash_map
203
204
205def get_list_of_suffix(bucket_address, prefix, filter_function):
206  """Gets the list of suffixes in files in a google storage bucket.
207
208  Example: a google storage bucket containing one file
209  'full-build-linux_20983' will return ['20983'] if prefix is
210  provided as 'full-build-linux'. Google Storage bucket
211  containing multiple files will return multiple suffixes.
212
213  Args:
214    bucket_address(String): Bucket URL to examine files from.
215    prefix(String): The prefix used in creating build file names
216    filter_function: A function that returns true if the extracted
217      suffix is in correct format and false otherwise. It allows
218      only proper suffix to be extracted and returned.
219
220  Returns:
221    (List) list of proper suffixes in the bucket.
222  """
223  file_list = cloud_storage.List(bucket_address)
224  suffix_list = []
225  extract_suffix = '.*?%s_(.*?)\.zip' %(prefix)
226  for file in file_list:
227    match = re.match(extract_suffix, file)
228    if match and filter_function(match.groups()[0]):
229      suffix_list.append(match.groups()[0])
230  return suffix_list
231
232
233def download_build(cp_num, revision_map, zip_file_name, context):
234  """Download a single build corresponding to the cp_num and context."""
235  remote_file_path = '%s/%s_%s.zip' % (context.original_remote_path,
236                                       context.file_prefix,
237                                       revision_map[cp_num])
238  try:
239    cloud_storage.Get(context.original_gs_bucket,
240                      remote_file_path, zip_file_name)
241  except Exception, e:
242    logging.warning('Failed to download: %s, error: %s', zip_file_name, e)
243    return False
244  return True
245
246
247def upload_build(zip_file, context):
248  """Uploads a single build in zip_file to the repackage_gs_url in context."""
249  cloud_storage.Insert(
250      context.repackage_gs_bucket, context.repackage_remote_path, zip_file)
251
252
253def download_revision_map(context):
254  """Downloads the revision map in original_gs_url in context."""
255  download_file = '%s/%s' % (context.repackage_remote_path, REVISION_MAP_FILE)
256  cloud_storage.Get(context.repackage_gs_bucket, download_file,
257                    context.revision_file)
258
259def get_revision_map(context):
260  """Downloads and returns the revision map in repackage_gs_url in context."""
261  bisect_repackage_utils.RemoveFile(context.revision_file)
262  download_revision_map(context)
263  with open(context.revision_file, 'r') as revision_file:
264    revision_map = json.load(revision_file)
265  bisect_repackage_utils.RemoveFile(context.revision_file)
266  return revision_map
267
268
269def upload_revision_map(revision_map, context):
270  """Upload the given revision_map to the repackage_gs_url in context."""
271  with open(context.revision_file, 'w') as revision_file:
272    json.dump(revision_map, revision_file)
273  cloud_storage.Insert(context.repackage_gs_bucket,
274                       context.repackage_remote_path,
275                       context.revision_file)
276  bisect_repackage_utils.RemoveFile(context.revision_file)
277
278
279def create_upload_revision_map(context):
280  """Creates and uploads a dictionary that maps from GitHash to CP number."""
281  gs_base_url = '%s/%s' % (context.original_gs_bucket,
282                           context.original_remote_path)
283  hash_list = get_list_of_suffix(gs_base_url, context.file_prefix,
284                                 bisect_repackage_utils.IsGitCommitHash)
285  cp_num_to_hash_map = create_cp_from_hash_map(hash_list)
286  upload_revision_map(cp_num_to_hash_map, context)
287
288
289def update_upload_revision_map(context):
290  """Updates and uploads a dictionary that maps from GitHash to CP number."""
291  gs_base_url = '%s/%s' % (context.original_gs_bucket,
292                           context.original_remote_path)
293  revision_map = get_revision_map(context)
294  hash_list = get_list_of_suffix(gs_base_url, context.file_prefix,
295                                 bisect_repackage_utils.IsGitCommitHash)
296  hash_list = list(set(hash_list)-set(revision_map.values()))
297  cp_num_to_hash_map = create_cp_from_hash_map(hash_list)
298  merged_dict = dict(cp_num_to_hash_map.items() + revision_map.items())
299  upload_revision_map(merged_dict, context)
300
301
302def make_lightweight_archive(file_archive, archive_name, files_to_archive,
303                             context, staging_dir, ignore_sub_folder):
304  """Repackages and strips the archive.
305
306  Repacakges and strips according to CHROME_REQUIRED_FILES and
307  CHROME_STRIP_LIST.
308  """
309  strip_list = CHROME_STRIP_LIST.get(context.archive)
310  tmp_archive = os.path.join(staging_dir, 'tmp_%s' % archive_name)
311  (zip_dir, zip_file) = bisect_repackage_utils.MakeZip(
312      tmp_archive, archive_name, files_to_archive, file_archive,
313      dir_in_zip=context.GetExtractedDir(),
314      raise_error=False, strip_files=strip_list,
315      ignore_sub_folder=ignore_sub_folder)
316  return (zip_dir, zip_file, tmp_archive)
317
318
319def remove_created_files_and_path(files, paths):
320  """Removes all the files and paths passed in."""
321  for file in files:
322    bisect_repackage_utils.RemoveFile(file)
323  for path in paths:
324    bisect_repackage_utils.RemovePath(path)
325
326
327def verify_chrome_run(zip_dir):
328  """This function executes chrome executable in zip_dir.
329
330  Currently, it is only supported for Linux Chrome builds.
331  Raises error if the execution fails for any reason.
332  """
333  try:
334    command = [os.path.join(zip_dir, 'chrome')]
335    code = bisect_repackage_utils.RunCommand(command)
336    if code != 0:
337      raise ChromeExecutionError('An error occurred when executing Chrome')
338  except ChromeExecutionError,e:
339    print(str(e))
340
341
342def get_whitelist_files(extracted_folder, archive):
343  """Gets all the files & directories matching whitelisted regex."""
344  whitelist_files = []
345  all_files = os.listdir(extracted_folder)
346  for file in all_files:
347    if re.match(CHROME_WHITELIST_FILES.get(archive), file):
348      whitelist_files.append(file)
349  return whitelist_files
350
351
352def repackage_single_revision(revision_map, verify_run, staging_dir,
353                              context, cp_num):
354  """Repackages a single Chrome build for manual bisect."""
355  archive_name = '%s_%s' %(context.file_prefix, cp_num)
356  file_archive = os.path.join(staging_dir, archive_name)
357  zip_file_name = '%s.zip' % (file_archive)
358  if not download_build(cp_num, revision_map, zip_file_name, context):
359    return
360
361  extract_dir = os.path.join(staging_dir, archive_name)
362  is_android = context.archive in ['arm', 'arm64']
363  files_to_include = CHROME_REQUIRED_FILES.get(context.archive)
364
365  dir_path_in_zip = context.GetExtractedDir()
366  extract_file_list = []
367  # Only extract required files and directories.
368  # And when there is no pattern checking for files.
369  if not CHROME_WHITELIST_FILES.get(context.archive):
370    for f in files_to_include:
371      if f.endswith('/'):
372        f += '*'
373      extract_file_list.append(os.path.join(dir_path_in_zip, f))
374
375  bisect_repackage_utils.ExtractZip(
376      zip_file_name, extract_dir, extract_file_list)
377  extracted_folder = os.path.join(extract_dir, dir_path_in_zip)
378
379  if CHROME_WHITELIST_FILES.get(context.archive):
380    whitelist_files = get_whitelist_files(extracted_folder, context.archive)
381    files_to_include += whitelist_files
382
383  (zip_dir, zip_file, tmp_archive) = make_lightweight_archive(extracted_folder,
384                                                              archive_name,
385                                                              files_to_include,
386                                                              context,
387                                                              staging_dir,
388                                                              is_android)
389
390  if verify_run:
391    verify_chrome_run(zip_dir)
392  upload_build(zip_file, context)
393  with open('upload_revs.json', 'r+') as rfile:
394    update_map = json.load(rfile)
395    update_map[str(cp_num)] = 'Done'
396    rfile.seek(0)
397    json.dump(update_map, rfile)
398    rfile.truncate()
399  # Removed temporary files created during repackaging process.
400  remove_created_files_and_path([zip_file_name],
401                                [zip_dir, extract_dir, tmp_archive])
402
403
404def repackage_revisions(revisions, revision_map, verify_run, staging_dir,
405                        context, quit_event=None, progress_event=None):
406  """Repackages all Chrome builds listed in revisions.
407
408  This function calls 'repackage_single_revision' with multithreading pool.
409  """
410  p = Pool(3)
411  func = partial(repackage_single_revision, revision_map, verify_run,
412                 staging_dir, context)
413  p.imap(func, revisions)
414  p.close()
415  p.join()
416
417
418def get_uploaded_builds(context):
419  """Returns already uploaded revisions in original bucket."""
420  gs_base_url = '%s/%s' % (context.repackage_gs_bucket,
421                           context.repackage_remote_path)
422  return get_list_of_suffix(gs_base_url, context.file_prefix,
423                            bisect_repackage_utils.IsCommitPosition)
424
425
426def get_revisions_to_package(revision_map, context):
427  """Returns revisions that need to be repackaged.
428
429  It subtracts revisions that are already packaged from all revisions that
430  need to be packaged. The revisions will be sorted in descending order.
431  """
432  already_packaged = get_uploaded_builds(context)
433  not_already_packaged = list(set(revision_map.keys())-set(already_packaged))
434  revisions_to_package = sorted(not_already_packaged, reverse=True)
435  return revisions_to_package
436
437
438def get_hash_from_cp(cp_num):
439  """Converts a commit position number to git hash."""
440  json_url = CHROMIUM_CP_TO_GITHASH % cp_num
441  response = urllib.urlopen(json_url)
442  if response.getcode() == 200:
443    try:
444      data = json.loads(response.read())
445      if 'git_sha' in data:
446        return data['git_sha']
447    except Exception, e:
448      logging.warning('Failed to fetch git_hash: %s, error: %s' % json_url, e)
449  else:
450      logging.warning('Failed to fetch git_hash: %s, CP: %s' % json_url, cp_num)
451  return None
452
453
454def get_revision_map_for_range(start_rev, end_rev):
455  revision_map = {}
456  for cp_num in range(start_rev, end_rev + 1 ):
457    git_hash = get_hash_from_cp(cp_num)
458    if git_hash:
459      revision_map[cp_num] = git_hash
460  return revision_map
461
462def get_overwrite_revisions(revision_map):
463  return sorted(revision_map.keys(), reverse=True)
464
465
466class RepackageJob(object):
467
468  def __init__(self, name, revisions_to_package, revision_map, verify_run,
469               staging_dir, context):
470    super(RepackageJob, self).__init__()
471    self.name = name
472    self.revisions_to_package = revisions_to_package
473    self.revision_map = revision_map
474    self.verify_run = verify_run
475    self.staging_dir = staging_dir
476    self.context = context
477    self.quit_event = threading.Event()
478    self.progress_event = threading.Event()
479    self.thread = None
480
481  def Start(self):
482    """Starts the download."""
483    fetchargs = (self.revisions_to_package,
484                 self.revision_map,
485                 self.verify_run,
486                 self.staging_dir,
487                 self.context,
488                 self.quit_event,
489                 self.progress_event)
490    self.thread = threading.Thread(target=repackage_revisions,
491                                   name=self.name,
492                                   args=fetchargs)
493    self.thread.start()
494
495  def Stop(self):
496    """Stops the download which must have been started previously."""
497    assert self.thread, 'DownloadJob must be started before Stop is called.'
498    self.quit_event.set()
499    self.thread.join()
500
501  def WaitFor(self):
502    """Prints a message and waits for the download to complete."""
503    assert self.thread, 'DownloadJob must be started before WaitFor is called.'
504    self.progress_event.set()  # Display progress of download.  def Stop(self):
505    assert self.thread, 'DownloadJob must be started before Stop is called.'
506    self.quit_event.set()
507    self.thread.join()
508
509
510def main(argv):
511  option_parser = optparse.OptionParser()
512
513  choices = ['mac', 'win32', 'win64', 'linux', 'arm', 'arm64']
514
515  option_parser.add_option('-a', '--archive',
516                           choices=choices,
517                           help='Builders to repacakge from [%s].' %
518                           '|'.join(choices))
519
520  # Verifies that the chrome executable runs
521  option_parser.add_option('-v', '--verify',
522                           action='store_true',
523                           help='Verifies that the Chrome executes normally'
524                                'without errors')
525
526  # This option will update the revision map.
527  option_parser.add_option('-u', '--update',
528                           action='store_true',
529                           help='Updates the list of revisions to repackage')
530
531  # This option will creates the revision map.
532  option_parser.add_option('-c', '--create',
533                           action='store_true',
534                           help='Creates the list of revisions to repackage')
535
536  # Original bucket that contains perf builds
537  option_parser.add_option('-o', '--original',
538                           type='str',
539                           help='Google storage bucket name containing original'
540                                'Chrome builds')
541
542  # Bucket that should archive lightweight perf builds
543  option_parser.add_option('-r', '--repackage',
544                           type='str',
545                           help='Google storage bucket name '
546                                 'to re-archive Chrome builds')
547
548  # Overwrites build archives for a given range.
549  option_parser.add_option('-w', '--overwrite',
550                           action='store_true',
551                           dest='overwrite',
552                           help='Overwrite build archives')
553
554  # Start revision for build overwrite.
555  option_parser.add_option('-s', '--start_rev',
556                           type='str',
557                           dest='start_rev',
558                           help='Start revision for overwrite')
559
560  # Start revision for build overwrite.
561  option_parser.add_option('-e', '--end_rev',
562                           type='str',
563                           dest='end_rev',
564                           help='end revision for overwrite')
565
566
567
568  verify_run = False
569  (opts, args) = option_parser.parse_args()
570  if opts.archive is None:
571    print('Error: missing required parameter: --archive')
572    option_parser.print_help()
573    return 1
574  if not opts.original or not opts.repackage:
575    raise ValueError('Need to specify original gs bucket url and'
576                     'repackage gs bucket url')
577  context = PathContext(opts.original, opts.repackage, opts.archive)
578
579  if opts.create:
580    create_upload_revision_map(context)
581
582  if opts.update:
583    update_upload_revision_map(context)
584
585  if opts.verify:
586    verify_run = True
587
588  if opts.overwrite:
589    if not opts.start_rev or not opts.end_rev:
590      raise ValueError('Need to specify overwrite range start (-s) and end (-e)'
591                       ' revision.')
592    revision_map = get_revision_map_for_range(
593        int(opts.start_rev), int(opts.end_rev))
594    backward_rev = get_overwrite_revisions(revision_map)
595    with open('upload_revs.json', 'w') as revision_file:
596      json.dump(revision_map, revision_file)
597  else:
598    revision_map = get_revision_map(context)
599    backward_rev = get_revisions_to_package(revision_map, context)
600
601  base_dir = os.path.join('.', context.archive)
602  # Clears any uncleared staging directories and create one
603  bisect_repackage_utils.RemovePath(base_dir)
604  bisect_repackage_utils.MaybeMakeDirectory(base_dir)
605  staging_dir = os.path.abspath(tempfile.mkdtemp(prefix='staging',
606                                                 dir=base_dir))
607  repackage = RepackageJob('backward_fetch', backward_rev, revision_map,
608                           verify_run, staging_dir, context)
609  # Multi-threading is not currently being used. But it can be used in
610  # cases when the repackaging needs to be quicker.
611  try:
612    repackage.Start()
613    repackage.WaitFor()
614  except (KeyboardInterrupt, SystemExit):
615    print('Cleaning up...')
616    bisect_repackage_utils.RemovePath(staging_dir)
617  print('Cleaning up...')
618  bisect_repackage_utils.RemovePath(staging_dir)
619
620
621if '__main__' == __name__:
622  sys.exit(main(sys.argv))
623