1# Copyright 2019 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Contains common helpers for working with Android manifests.""" 6 7import hashlib 8import os 9import re 10import shlex 11import sys 12import xml.dom.minidom as minidom 13 14from util import build_utils 15from xml.etree import ElementTree 16 17ANDROID_NAMESPACE = 'http://schemas.android.com/apk/res/android' 18TOOLS_NAMESPACE = 'http://schemas.android.com/tools' 19DIST_NAMESPACE = 'http://schemas.android.com/apk/distribution' 20EMPTY_ANDROID_MANIFEST_PATH = os.path.abspath( 21 os.path.join(os.path.dirname(__file__), '..', '..', 'AndroidManifest.xml')) 22# When normalizing for expectation matching, wrap these tags when they are long 23# or else they become very hard to read. 24_WRAP_CANDIDATES = ( 25 '<manifest', 26 '<application', 27 '<activity', 28 '<provider', 29 '<receiver', 30 '<service', 31) 32# Don't wrap lines shorter than this. 33_WRAP_LINE_LENGTH = 100 34 35_xml_namespace_initialized = False 36 37 38def _RegisterElementTreeNamespaces(): 39 global _xml_namespace_initialized 40 if _xml_namespace_initialized: 41 return 42 _xml_namespace_initialized = True 43 ElementTree.register_namespace('android', ANDROID_NAMESPACE) 44 ElementTree.register_namespace('tools', TOOLS_NAMESPACE) 45 ElementTree.register_namespace('dist', DIST_NAMESPACE) 46 47 48def ParseManifest(path): 49 """Parses an AndroidManifest.xml using ElementTree. 50 51 Registers required namespaces, creates application node if missing, adds any 52 missing namespaces for 'android', 'tools' and 'dist'. 53 54 Returns tuple of: 55 doc: Root xml document. 56 manifest_node: the <manifest> node. 57 app_node: the <application> node. 58 """ 59 _RegisterElementTreeNamespaces() 60 doc = ElementTree.parse(path) 61 # ElementTree.find does not work if the required tag is the root. 62 if doc.getroot().tag == 'manifest': 63 manifest_node = doc.getroot() 64 else: 65 manifest_node = doc.find('manifest') 66 67 app_node = doc.find('application') 68 if app_node is None: 69 app_node = ElementTree.SubElement(manifest_node, 'application') 70 71 return doc, manifest_node, app_node 72 73 74def SaveManifest(doc, path): 75 with build_utils.AtomicOutput(path) as f: 76 f.write(ElementTree.tostring(doc.getroot(), encoding='UTF-8')) 77 78 79def GetPackage(manifest_node): 80 return manifest_node.get('package') 81 82 83def AssertUsesSdk(manifest_node, 84 min_sdk_version=None, 85 target_sdk_version=None, 86 max_sdk_version=None, 87 fail_if_not_exist=False): 88 """Asserts values of attributes of <uses-sdk> element. 89 90 Unless |fail_if_not_exist| is true, will only assert if both the passed value 91 is not None and the value of attribute exist. If |fail_if_not_exist| is true 92 will fail if passed value is not None but attribute does not exist. 93 """ 94 uses_sdk_node = manifest_node.find('./uses-sdk') 95 if uses_sdk_node is None: 96 return 97 for prefix, sdk_version in (('min', min_sdk_version), ('target', 98 target_sdk_version), 99 ('max', max_sdk_version)): 100 value = uses_sdk_node.get('{%s}%sSdkVersion' % (ANDROID_NAMESPACE, prefix)) 101 if fail_if_not_exist and not value and sdk_version: 102 assert False, ( 103 '%sSdkVersion in Android manifest does not exist but we expect %s' % 104 (prefix, sdk_version)) 105 if not value or not sdk_version: 106 continue 107 assert value == sdk_version, ( 108 '%sSdkVersion in Android manifest is %s but we expect %s' % 109 (prefix, value, sdk_version)) 110 111 112def AssertPackage(manifest_node, package): 113 """Asserts that manifest package has desired value. 114 115 Will only assert if both |package| is not None and the package is set in the 116 manifest. 117 """ 118 package_value = GetPackage(manifest_node) 119 if package_value is None or package is None: 120 return 121 assert package_value == package, ( 122 'Package in Android manifest is %s but we expect %s' % (package_value, 123 package)) 124 125 126def _SortAndStripElementTree(root): 127 # Sort alphabetically with two exceptions: 128 # 1) Put <application> node last (since it's giant). 129 # 2) Put android:name before other attributes. 130 def element_sort_key(node): 131 if node.tag == 'application': 132 return 'z' 133 ret = ElementTree.tostring(node) 134 # ElementTree.tostring inserts namespace attributes for any that are needed 135 # for the node or any of its descendants. Remove them so as to prevent a 136 # change to a child that adds/removes a namespace usage from changing sort 137 # order. 138 return re.sub(r' xmlns:.*?".*?"', '', ret.decode('utf8')) 139 140 name_attr = '{%s}name' % ANDROID_NAMESPACE 141 142 def attribute_sort_key(tup): 143 return ('', '') if tup[0] == name_attr else tup 144 145 def helper(node): 146 for child in node: 147 if child.text and child.text.isspace(): 148 child.text = None 149 helper(child) 150 151 # Sort attributes (requires Python 3.8+). 152 node.attrib = dict(sorted(node.attrib.items(), key=attribute_sort_key)) 153 154 # Sort nodes 155 node[:] = sorted(node, key=element_sort_key) 156 157 helper(root) 158 159 160def _SplitElement(line): 161 """Parses a one-line xml node into ('<tag', ['a="b"', ...]], '/>').""" 162 163 # Shlex splits nicely, but removes quotes. Need to put them back. 164 def restore_quotes(value): 165 return value.replace('=', '="', 1) + '"' 166 167 # Simplify restore_quotes by separating />. 168 assert line.endswith('>'), line 169 end_tag = '>' 170 if line.endswith('/>'): 171 end_tag = '/>' 172 line = line[:-len(end_tag)] 173 174 # Use shlex to avoid having to re-encode ", etc. 175 parts = shlex.split(line) 176 start_tag = parts[0] 177 attrs = parts[1:] 178 179 return start_tag, [restore_quotes(x) for x in attrs], end_tag 180 181 182def _CreateNodeHash(lines): 183 """Computes a hash (md5) for the first XML node found in |lines|. 184 185 Args: 186 lines: List of strings containing pretty-printed XML. 187 188 Returns: 189 Positive 32-bit integer hash of the node (including children). 190 """ 191 target_indent = lines[0].find('<') 192 tag_closed = False 193 for i, l in enumerate(lines[1:]): 194 cur_indent = l.find('<') 195 if cur_indent != -1 and cur_indent <= target_indent: 196 tag_lines = lines[:i + 1] 197 break 198 elif not tag_closed and 'android:name="' in l: 199 # To reduce noise of node tags changing, use android:name as the 200 # basis the hash since they usually unique. 201 tag_lines = [l] 202 break 203 tag_closed = tag_closed or '>' in l 204 else: 205 assert False, 'Did not find end of node:\n' + '\n'.join(lines) 206 207 # Insecure and truncated hash as it only needs to be unique vs. its neighbors. 208 return hashlib.md5(('\n'.join(tag_lines)).encode('utf8')).hexdigest()[:8] 209 210 211def _IsSelfClosing(lines): 212 """Given pretty-printed xml, returns whether first node is self-closing.""" 213 for l in lines: 214 idx = l.find('>') 215 if idx != -1: 216 return l[idx - 1] == '/' 217 assert False, 'Did not find end of tag:\n' + '\n'.join(lines) 218 219 220def _AddDiffTags(lines): 221 # When multiple identical tags appear sequentially, XML diffs can look like: 222 # + </tag> 223 # + <tag> 224 # rather than: 225 # + <tag> 226 # + </tag> 227 # To reduce confusion, add hashes to tags. 228 # This also ensures changed tags show up with outer <tag> elements rather than 229 # showing only changed attributes. 230 hash_stack = [] 231 for i, l in enumerate(lines): 232 stripped = l.lstrip() 233 # Ignore non-indented tags and lines that are not the start/end of a node. 234 if l[0] != ' ' or stripped[0] != '<': 235 continue 236 # Ignore self-closing nodes that fit on one line. 237 if l[-2:] == '/>': 238 continue 239 # Ignore <application> since diff tag changes with basically any change. 240 if stripped.lstrip('</').startswith('application'): 241 continue 242 243 # Check for the closing tag (</foo>). 244 if stripped[1] != '/': 245 cur_hash = _CreateNodeHash(lines[i:]) 246 if not _IsSelfClosing(lines[i:]): 247 hash_stack.append(cur_hash) 248 else: 249 cur_hash = hash_stack.pop() 250 lines[i] += ' # DIFF-ANCHOR: {}'.format(cur_hash) 251 assert not hash_stack, 'hash_stack was not empty:\n' + '\n'.join(hash_stack) 252 253 254def NormalizeManifest(manifest_contents): 255 _RegisterElementTreeNamespaces() 256 # This also strips comments and sorts node attributes alphabetically. 257 root = ElementTree.fromstring(manifest_contents) 258 package = GetPackage(root) 259 260 app_node = root.find('application') 261 if app_node is not None: 262 # android:debuggable is added when !is_official_build. Strip it out to avoid 263 # expectation diffs caused by not adding is_official_build. Play store 264 # blocks uploading apps with it set, so there's no risk of it slipping in. 265 debuggable_name = '{%s}debuggable' % ANDROID_NAMESPACE 266 if debuggable_name in app_node.attrib: 267 del app_node.attrib[debuggable_name] 268 269 # Trichrome's static library version number is updated daily. To avoid 270 # frequent manifest check failures, we remove the exact version number 271 # during normalization. 272 for node in app_node: 273 if (node.tag in ['uses-static-library', 'static-library'] 274 and '{%s}version' % ANDROID_NAMESPACE in node.keys() 275 and '{%s}name' % ANDROID_NAMESPACE in node.keys()): 276 node.set('{%s}version' % ANDROID_NAMESPACE, '$VERSION_NUMBER') 277 278 # We also remove the exact package name (except the one at the root level) 279 # to avoid noise during manifest comparison. 280 def blur_package_name(node): 281 for key in node.keys(): 282 node.set(key, node.get(key).replace(package, '$PACKAGE')) 283 284 for child in node: 285 blur_package_name(child) 286 287 # We only blur the package names of non-root nodes because they generate a lot 288 # of diffs when doing manifest checks for upstream targets. We still want to 289 # have 1 piece of package name not blurred just in case the package name is 290 # mistakenly changed. 291 for child in root: 292 blur_package_name(child) 293 294 _SortAndStripElementTree(root) 295 296 # Fix up whitespace/indentation. 297 dom = minidom.parseString(ElementTree.tostring(root)) 298 out_lines = [] 299 for l in dom.toprettyxml(indent=' ').splitlines(): 300 if not l or l.isspace(): 301 continue 302 if len(l) > _WRAP_LINE_LENGTH and any(x in l for x in _WRAP_CANDIDATES): 303 indent = ' ' * l.find('<') 304 start_tag, attrs, end_tag = _SplitElement(l) 305 out_lines.append('{}{}'.format(indent, start_tag)) 306 for attribute in attrs: 307 out_lines.append('{} {}'.format(indent, attribute)) 308 out_lines[-1] += '>' 309 # Heuristic: Do not allow multi-line tags to be self-closing since these 310 # can generally be allowed to have nested elements. When diffing, it adds 311 # noise if the base file is self-closing and the non-base file is not 312 # self-closing. 313 if end_tag == '/>': 314 out_lines.append('{}{}>'.format(indent, start_tag.replace('<', '</'))) 315 else: 316 out_lines.append(l) 317 318 # Make output more diff-friendly. 319 _AddDiffTags(out_lines) 320 321 return '\n'.join(out_lines) + '\n' 322