1# Copyright 2019 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
5"""Contains common helpers for working with Android manifests."""
7import hashlib
8import os
9import re
10import shlex
11import sys
12import xml.dom.minidom as minidom
14from util import build_utils
15from xml.etree import ElementTree
17ANDROID_NAMESPACE = 'http://schemas.android.com/apk/res/android'
18TOOLS_NAMESPACE = 'http://schemas.android.com/tools'
19DIST_NAMESPACE = 'http://schemas.android.com/apk/distribution'
20EMPTY_ANDROID_MANIFEST_PATH = os.path.abspath(
21    os.path.join(os.path.dirname(__file__), '..', '..', 'AndroidManifest.xml'))
22# When normalizing for expectation matching, wrap these tags when they are long
23# or else they become very hard to read.
25    '<manifest',
26    '<application',
27    '<activity',
28    '<provider',
29    '<receiver',
30    '<service',
32# Don't wrap lines shorter than this.
35_xml_namespace_initialized = False
38def _RegisterElementTreeNamespaces():
39  global _xml_namespace_initialized
40  if _xml_namespace_initialized:
41    return
42  _xml_namespace_initialized = True
43  ElementTree.register_namespace('android', ANDROID_NAMESPACE)
44  ElementTree.register_namespace('tools', TOOLS_NAMESPACE)
45  ElementTree.register_namespace('dist', DIST_NAMESPACE)
48def ParseManifest(path):
49  """Parses an AndroidManifest.xml using ElementTree.
51  Registers required namespaces, creates application node if missing, adds any
52  missing namespaces for 'android', 'tools' and 'dist'.
54  Returns tuple of:
55    doc: Root xml document.
56    manifest_node: the <manifest> node.
57    app_node: the <application> node.
58  """
59  _RegisterElementTreeNamespaces()
60  doc = ElementTree.parse(path)
61  # ElementTree.find does not work if the required tag is the root.
62  if doc.getroot().tag == 'manifest':
63    manifest_node = doc.getroot()
64  else:
65    manifest_node = doc.find('manifest')
67  app_node = doc.find('application')
68  if app_node is None:
69    app_node = ElementTree.SubElement(manifest_node, 'application')
71  return doc, manifest_node, app_node
74def SaveManifest(doc, path):
75  with build_utils.AtomicOutput(path) as f:
76    f.write(ElementTree.tostring(doc.getroot(), encoding='UTF-8'))
79def GetPackage(manifest_node):
80  return manifest_node.get('package')
83def AssertUsesSdk(manifest_node,
84                  min_sdk_version=None,
85                  target_sdk_version=None,
86                  max_sdk_version=None,
87                  fail_if_not_exist=False):
88  """Asserts values of attributes of <uses-sdk> element.
90  Unless |fail_if_not_exist| is true, will only assert if both the passed value
91  is not None and the value of attribute exist. If |fail_if_not_exist| is true
92  will fail if passed value is not None but attribute does not exist.
93  """
94  uses_sdk_node = manifest_node.find('./uses-sdk')
95  if uses_sdk_node is None:
96    return
97  for prefix, sdk_version in (('min', min_sdk_version), ('target',
98                                                         target_sdk_version),
99                              ('max', max_sdk_version)):
100    value = uses_sdk_node.get('{%s}%sSdkVersion' % (ANDROID_NAMESPACE, prefix))
101    if fail_if_not_exist and not value and sdk_version:
102      assert False, (
103          '%sSdkVersion in Android manifest does not exist but we expect %s' %
104          (prefix, sdk_version))
105    if not value or not sdk_version:
106      continue
107    assert value == sdk_version, (
108        '%sSdkVersion in Android manifest is %s but we expect %s' %
109        (prefix, value, sdk_version))
112def AssertPackage(manifest_node, package):
113  """Asserts that manifest package has desired value.
115  Will only assert if both |package| is not None and the package is set in the
116  manifest.
117  """
118  package_value = GetPackage(manifest_node)
119  if package_value is None or package is None:
120    return
121  assert package_value == package, (
122      'Package in Android manifest is %s but we expect %s' % (package_value,
123                                                              package))
126def _SortAndStripElementTree(root):
127  # Sort alphabetically with two exceptions:
128  # 1) Put <application> node last (since it's giant).
129  # 2) Put android:name before other attributes.
130  def element_sort_key(node):
131    if node.tag == 'application':
132      return 'z'
133    ret = ElementTree.tostring(node)
134    # ElementTree.tostring inserts namespace attributes for any that are needed
135    # for the node or any of its descendants. Remove them so as to prevent a
136    # change to a child that adds/removes a namespace usage from changing sort
137    # order.
138    return re.sub(r' xmlns:.*?".*?"', '', ret.decode('utf8'))
140  name_attr = '{%s}name' % ANDROID_NAMESPACE
142  def attribute_sort_key(tup):
143    return ('', '') if tup[0] == name_attr else tup
145  def helper(node):
146    for child in node:
147      if child.text and child.text.isspace():
148        child.text = None
149      helper(child)
151    # Sort attributes (requires Python 3.8+).
152    node.attrib = dict(sorted(node.attrib.items(), key=attribute_sort_key))
154    # Sort nodes
155    node[:] = sorted(node, key=element_sort_key)
157  helper(root)
160def _SplitElement(line):
161  """Parses a one-line xml node into ('<tag', ['a="b"', ...]], '/>')."""
163  # Shlex splits nicely, but removes quotes. Need to put them back.
164  def restore_quotes(value):
165    return value.replace('=', '="', 1) + '"'
167  # Simplify restore_quotes by separating />.
168  assert line.endswith('>'), line
169  end_tag = '>'
170  if line.endswith('/>'):
171    end_tag = '/>'
172  line = line[:-len(end_tag)]
174  # Use shlex to avoid having to re-encode &quot;, etc.
175  parts = shlex.split(line)
176  start_tag = parts[0]
177  attrs = parts[1:]
179  return start_tag, [restore_quotes(x) for x in attrs], end_tag
182def _CreateNodeHash(lines):
183  """Computes a hash (md5) for the first XML node found in |lines|.
185  Args:
186    lines: List of strings containing pretty-printed XML.
188  Returns:
189    Positive 32-bit integer hash of the node (including children).
190  """
191  target_indent = lines[0].find('<')
192  tag_closed = False
193  for i, l in enumerate(lines[1:]):
194    cur_indent = l.find('<')
195    if cur_indent != -1 and cur_indent <= target_indent:
196      tag_lines = lines[:i + 1]
197      break
198    elif not tag_closed and 'android:name="' in l:
199      # To reduce noise of node tags changing, use android:name as the
200      # basis the hash since they usually unique.
201      tag_lines = [l]
202      break
203    tag_closed = tag_closed or '>' in l
204  else:
205    assert False, 'Did not find end of node:\n' + '\n'.join(lines)
207  # Insecure and truncated hash as it only needs to be unique vs. its neighbors.
208  return hashlib.md5(('\n'.join(tag_lines)).encode('utf8')).hexdigest()[:8]
211def _IsSelfClosing(lines):
212  """Given pretty-printed xml, returns whether first node is self-closing."""
213  for l in lines:
214    idx = l.find('>')
215    if idx != -1:
216      return l[idx - 1] == '/'
217  assert False, 'Did not find end of tag:\n' + '\n'.join(lines)
220def _AddDiffTags(lines):
221  # When multiple identical tags appear sequentially, XML diffs can look like:
222  # +  </tag>
223  # +  <tag>
224  # rather than:
225  # +  <tag>
226  # +  </tag>
227  # To reduce confusion, add hashes to tags.
228  # This also ensures changed tags show up with outer <tag> elements rather than
229  # showing only changed attributes.
230  hash_stack = []
231  for i, l in enumerate(lines):
232    stripped = l.lstrip()
233    # Ignore non-indented tags and lines that are not the start/end of a node.
234    if l[0] != ' ' or stripped[0] != '<':
235      continue
236    # Ignore self-closing nodes that fit on one line.
237    if l[-2:] == '/>':
238      continue
239    # Ignore <application> since diff tag changes with basically any change.
240    if stripped.lstrip('</').startswith('application'):
241      continue
243    # Check for the closing tag (</foo>).
244    if stripped[1] != '/':
245      cur_hash = _CreateNodeHash(lines[i:])
246      if not _IsSelfClosing(lines[i:]):
247        hash_stack.append(cur_hash)
248    else:
249      cur_hash = hash_stack.pop()
250    lines[i] += '  # DIFF-ANCHOR: {}'.format(cur_hash)
251  assert not hash_stack, 'hash_stack was not empty:\n' + '\n'.join(hash_stack)
254def NormalizeManifest(manifest_contents):
255  _RegisterElementTreeNamespaces()
256  # This also strips comments and sorts node attributes alphabetically.
257  root = ElementTree.fromstring(manifest_contents)
258  package = GetPackage(root)
260  app_node = root.find('application')
261  if app_node is not None:
262    # android:debuggable is added when !is_official_build. Strip it out to avoid
263    # expectation diffs caused by not adding is_official_build. Play store
264    # blocks uploading apps with it set, so there's no risk of it slipping in.
265    debuggable_name = '{%s}debuggable' % ANDROID_NAMESPACE
266    if debuggable_name in app_node.attrib:
267      del app_node.attrib[debuggable_name]
269    # Trichrome's static library version number is updated daily. To avoid
270    # frequent manifest check failures, we remove the exact version number
271    # during normalization.
272    for node in app_node:
273      if (node.tag in ['uses-static-library', 'static-library']
274          and '{%s}version' % ANDROID_NAMESPACE in node.keys()
275          and '{%s}name' % ANDROID_NAMESPACE in node.keys()):
276        node.set('{%s}version' % ANDROID_NAMESPACE, '$VERSION_NUMBER')
278  # We also remove the exact package name (except the one at the root level)
279  # to avoid noise during manifest comparison.
280  def blur_package_name(node):
281    for key in node.keys():
282      node.set(key, node.get(key).replace(package, '$PACKAGE'))
284    for child in node:
285      blur_package_name(child)
287  # We only blur the package names of non-root nodes because they generate a lot
288  # of diffs when doing manifest checks for upstream targets. We still want to
289  # have 1 piece of package name not blurred just in case the package name is
290  # mistakenly changed.
291  for child in root:
292    blur_package_name(child)
294  _SortAndStripElementTree(root)
296  # Fix up whitespace/indentation.
297  dom = minidom.parseString(ElementTree.tostring(root))
298  out_lines = []
299  for l in dom.toprettyxml(indent='  ').splitlines():
300    if not l or l.isspace():
301      continue
302    if len(l) > _WRAP_LINE_LENGTH and any(x in l for x in _WRAP_CANDIDATES):
303      indent = ' ' * l.find('<')
304      start_tag, attrs, end_tag = _SplitElement(l)
305      out_lines.append('{}{}'.format(indent, start_tag))
306      for attribute in attrs:
307        out_lines.append('{}    {}'.format(indent, attribute))
308      out_lines[-1] += '>'
309      # Heuristic: Do not allow multi-line tags to be self-closing since these
310      # can generally be allowed to have nested elements. When diffing, it adds
311      # noise if the base file is self-closing and the non-base file is not
312      # self-closing.
313      if end_tag == '/>':
314        out_lines.append('{}{}>'.format(indent, start_tag.replace('<', '</')))
315    else:
316      out_lines.append(l)
318  # Make output more diff-friendly.
319  _AddDiffTags(out_lines)
321  return '\n'.join(out_lines) + '\n'