1# Copyright 2019 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Contains common helpers for working with Android manifests."""
6
7import hashlib
8import os
9import re
10import shlex
11import sys
12import xml.dom.minidom as minidom
13
14from util import build_utils
15from xml.etree import ElementTree
16
17ANDROID_NAMESPACE = 'http://schemas.android.com/apk/res/android'
18TOOLS_NAMESPACE = 'http://schemas.android.com/tools'
19DIST_NAMESPACE = 'http://schemas.android.com/apk/distribution'
20EMPTY_ANDROID_MANIFEST_PATH = os.path.abspath(
21    os.path.join(os.path.dirname(__file__), '..', '..', 'AndroidManifest.xml'))
22# When normalizing for expectation matching, wrap these tags when they are long
23# or else they become very hard to read.
24_WRAP_CANDIDATES = (
25    '<manifest',
26    '<application',
27    '<activity',
28    '<provider',
29    '<receiver',
30    '<service',
31)
32# Don't wrap lines shorter than this.
33_WRAP_LINE_LENGTH = 100
34
35_xml_namespace_initialized = False
36
37
38def _RegisterElementTreeNamespaces():
39  global _xml_namespace_initialized
40  if _xml_namespace_initialized:
41    return
42  _xml_namespace_initialized = True
43  ElementTree.register_namespace('android', ANDROID_NAMESPACE)
44  ElementTree.register_namespace('tools', TOOLS_NAMESPACE)
45  ElementTree.register_namespace('dist', DIST_NAMESPACE)
46
47
48def ParseManifest(path):
49  """Parses an AndroidManifest.xml using ElementTree.
50
51  Registers required namespaces, creates application node if missing, adds any
52  missing namespaces for 'android', 'tools' and 'dist'.
53
54  Returns tuple of:
55    doc: Root xml document.
56    manifest_node: the <manifest> node.
57    app_node: the <application> node.
58  """
59  _RegisterElementTreeNamespaces()
60  doc = ElementTree.parse(path)
61  # ElementTree.find does not work if the required tag is the root.
62  if doc.getroot().tag == 'manifest':
63    manifest_node = doc.getroot()
64  else:
65    manifest_node = doc.find('manifest')
66
67  app_node = doc.find('application')
68  if app_node is None:
69    app_node = ElementTree.SubElement(manifest_node, 'application')
70
71  return doc, manifest_node, app_node
72
73
74def SaveManifest(doc, path):
75  with build_utils.AtomicOutput(path) as f:
76    f.write(ElementTree.tostring(doc.getroot(), encoding='UTF-8'))
77
78
79def GetPackage(manifest_node):
80  return manifest_node.get('package')
81
82
83def AssertUsesSdk(manifest_node,
84                  min_sdk_version=None,
85                  target_sdk_version=None,
86                  max_sdk_version=None,
87                  fail_if_not_exist=False):
88  """Asserts values of attributes of <uses-sdk> element.
89
90  Unless |fail_if_not_exist| is true, will only assert if both the passed value
91  is not None and the value of attribute exist. If |fail_if_not_exist| is true
92  will fail if passed value is not None but attribute does not exist.
93  """
94  uses_sdk_node = manifest_node.find('./uses-sdk')
95  if uses_sdk_node is None:
96    return
97  for prefix, sdk_version in (('min', min_sdk_version), ('target',
98                                                         target_sdk_version),
99                              ('max', max_sdk_version)):
100    value = uses_sdk_node.get('{%s}%sSdkVersion' % (ANDROID_NAMESPACE, prefix))
101    if fail_if_not_exist and not value and sdk_version:
102      assert False, (
103          '%sSdkVersion in Android manifest does not exist but we expect %s' %
104          (prefix, sdk_version))
105    if not value or not sdk_version:
106      continue
107    assert value == sdk_version, (
108        '%sSdkVersion in Android manifest is %s but we expect %s' %
109        (prefix, value, sdk_version))
110
111
112def AssertPackage(manifest_node, package):
113  """Asserts that manifest package has desired value.
114
115  Will only assert if both |package| is not None and the package is set in the
116  manifest.
117  """
118  package_value = GetPackage(manifest_node)
119  if package_value is None or package is None:
120    return
121  assert package_value == package, (
122      'Package in Android manifest is %s but we expect %s' % (package_value,
123                                                              package))
124
125
126def _SortAndStripElementTree(root):
127  # Sort alphabetically with two exceptions:
128  # 1) Put <application> node last (since it's giant).
129  # 2) Put android:name before other attributes.
130  def element_sort_key(node):
131    if node.tag == 'application':
132      return 'z'
133    ret = ElementTree.tostring(node)
134    # ElementTree.tostring inserts namespace attributes for any that are needed
135    # for the node or any of its descendants. Remove them so as to prevent a
136    # change to a child that adds/removes a namespace usage from changing sort
137    # order.
138    return re.sub(r' xmlns:.*?".*?"', '', ret.decode('utf8'))
139
140  name_attr = '{%s}name' % ANDROID_NAMESPACE
141
142  def attribute_sort_key(tup):
143    return ('', '') if tup[0] == name_attr else tup
144
145  def helper(node):
146    for child in node:
147      if child.text and child.text.isspace():
148        child.text = None
149      helper(child)
150
151    # Sort attributes (requires Python 3.8+).
152    node.attrib = dict(sorted(node.attrib.items(), key=attribute_sort_key))
153
154    # Sort nodes
155    node[:] = sorted(node, key=element_sort_key)
156
157  helper(root)
158
159
160def _SplitElement(line):
161  """Parses a one-line xml node into ('<tag', ['a="b"', ...]], '/>')."""
162
163  # Shlex splits nicely, but removes quotes. Need to put them back.
164  def restore_quotes(value):
165    return value.replace('=', '="', 1) + '"'
166
167  # Simplify restore_quotes by separating />.
168  assert line.endswith('>'), line
169  end_tag = '>'
170  if line.endswith('/>'):
171    end_tag = '/>'
172  line = line[:-len(end_tag)]
173
174  # Use shlex to avoid having to re-encode &quot;, etc.
175  parts = shlex.split(line)
176  start_tag = parts[0]
177  attrs = parts[1:]
178
179  return start_tag, [restore_quotes(x) for x in attrs], end_tag
180
181
182def _CreateNodeHash(lines):
183  """Computes a hash (md5) for the first XML node found in |lines|.
184
185  Args:
186    lines: List of strings containing pretty-printed XML.
187
188  Returns:
189    Positive 32-bit integer hash of the node (including children).
190  """
191  target_indent = lines[0].find('<')
192  tag_closed = False
193  for i, l in enumerate(lines[1:]):
194    cur_indent = l.find('<')
195    if cur_indent != -1 and cur_indent <= target_indent:
196      tag_lines = lines[:i + 1]
197      break
198    elif not tag_closed and 'android:name="' in l:
199      # To reduce noise of node tags changing, use android:name as the
200      # basis the hash since they usually unique.
201      tag_lines = [l]
202      break
203    tag_closed = tag_closed or '>' in l
204  else:
205    assert False, 'Did not find end of node:\n' + '\n'.join(lines)
206
207  # Insecure and truncated hash as it only needs to be unique vs. its neighbors.
208  return hashlib.md5(('\n'.join(tag_lines)).encode('utf8')).hexdigest()[:8]
209
210
211def _IsSelfClosing(lines):
212  """Given pretty-printed xml, returns whether first node is self-closing."""
213  for l in lines:
214    idx = l.find('>')
215    if idx != -1:
216      return l[idx - 1] == '/'
217  assert False, 'Did not find end of tag:\n' + '\n'.join(lines)
218
219
220def _AddDiffTags(lines):
221  # When multiple identical tags appear sequentially, XML diffs can look like:
222  # +  </tag>
223  # +  <tag>
224  # rather than:
225  # +  <tag>
226  # +  </tag>
227  # To reduce confusion, add hashes to tags.
228  # This also ensures changed tags show up with outer <tag> elements rather than
229  # showing only changed attributes.
230  hash_stack = []
231  for i, l in enumerate(lines):
232    stripped = l.lstrip()
233    # Ignore non-indented tags and lines that are not the start/end of a node.
234    if l[0] != ' ' or stripped[0] != '<':
235      continue
236    # Ignore self-closing nodes that fit on one line.
237    if l[-2:] == '/>':
238      continue
239    # Ignore <application> since diff tag changes with basically any change.
240    if stripped.lstrip('</').startswith('application'):
241      continue
242
243    # Check for the closing tag (</foo>).
244    if stripped[1] != '/':
245      cur_hash = _CreateNodeHash(lines[i:])
246      if not _IsSelfClosing(lines[i:]):
247        hash_stack.append(cur_hash)
248    else:
249      cur_hash = hash_stack.pop()
250    lines[i] += '  # DIFF-ANCHOR: {}'.format(cur_hash)
251  assert not hash_stack, 'hash_stack was not empty:\n' + '\n'.join(hash_stack)
252
253
254def NormalizeManifest(manifest_contents):
255  _RegisterElementTreeNamespaces()
256  # This also strips comments and sorts node attributes alphabetically.
257  root = ElementTree.fromstring(manifest_contents)
258  package = GetPackage(root)
259
260  app_node = root.find('application')
261  if app_node is not None:
262    # android:debuggable is added when !is_official_build. Strip it out to avoid
263    # expectation diffs caused by not adding is_official_build. Play store
264    # blocks uploading apps with it set, so there's no risk of it slipping in.
265    debuggable_name = '{%s}debuggable' % ANDROID_NAMESPACE
266    if debuggable_name in app_node.attrib:
267      del app_node.attrib[debuggable_name]
268
269    # Trichrome's static library version number is updated daily. To avoid
270    # frequent manifest check failures, we remove the exact version number
271    # during normalization.
272    for node in app_node:
273      if (node.tag in ['uses-static-library', 'static-library']
274          and '{%s}version' % ANDROID_NAMESPACE in node.keys()
275          and '{%s}name' % ANDROID_NAMESPACE in node.keys()):
276        node.set('{%s}version' % ANDROID_NAMESPACE, '$VERSION_NUMBER')
277
278  # We also remove the exact package name (except the one at the root level)
279  # to avoid noise during manifest comparison.
280  def blur_package_name(node):
281    for key in node.keys():
282      node.set(key, node.get(key).replace(package, '$PACKAGE'))
283
284    for child in node:
285      blur_package_name(child)
286
287  # We only blur the package names of non-root nodes because they generate a lot
288  # of diffs when doing manifest checks for upstream targets. We still want to
289  # have 1 piece of package name not blurred just in case the package name is
290  # mistakenly changed.
291  for child in root:
292    blur_package_name(child)
293
294  _SortAndStripElementTree(root)
295
296  # Fix up whitespace/indentation.
297  dom = minidom.parseString(ElementTree.tostring(root))
298  out_lines = []
299  for l in dom.toprettyxml(indent='  ').splitlines():
300    if not l or l.isspace():
301      continue
302    if len(l) > _WRAP_LINE_LENGTH and any(x in l for x in _WRAP_CANDIDATES):
303      indent = ' ' * l.find('<')
304      start_tag, attrs, end_tag = _SplitElement(l)
305      out_lines.append('{}{}'.format(indent, start_tag))
306      for attribute in attrs:
307        out_lines.append('{}    {}'.format(indent, attribute))
308      out_lines[-1] += '>'
309      # Heuristic: Do not allow multi-line tags to be self-closing since these
310      # can generally be allowed to have nested elements. When diffing, it adds
311      # noise if the base file is self-closing and the non-base file is not
312      # self-closing.
313      if end_tag == '/>':
314        out_lines.append('{}{}>'.format(indent, start_tag.replace('<', '</')))
315    else:
316      out_lines.append(l)
317
318  # Make output more diff-friendly.
319  _AddDiffTags(out_lines)
320
321  return '\n'.join(out_lines) + '\n'
322