1 /* 2 * Copyright 2017, Leanplum, Inc. All rights reserved. 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 */ 21 22 package com.leanplum.internal; 23 24 /** 25 * LeanplumManifestParser class for get AndroidManifest.xml. http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package 26 * 27 * @author Anna Orlova 28 */ 29 class LeanplumManifestParser { 30 // XML tags and attributes: 31 // Every XML start and end tag consists of 6 32 bit words: 32 // 0th word: 02011000 for START_TAG and 03011000 for END_TAG 33 // 1st word: a flag?, like 38000000 34 // 2nd word: Line of where this tag appeared in the original source file 35 // 3rd word: FFFFFFFF ?? 36 // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS 37 // 5th word: StringIndex of Element Name 38 // (Note: 01011000 in 0th word means end of XML document, END_DOC_TAG). 39 40 // Start tags (not end tags) contain 3 more words: 41 // 6th word: 14001400 meaning?? 42 // 7th word: Number of Attributes that follow this tag(follow word 8th) 43 // 8th word: 00000000 meaning?? 44 45 // Attributes consist of 5 words: 46 // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF 47 // 1st word: StringIndex of Attribute Name 48 // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId used 49 // 3rd word: Flags? 50 // 4th word: str ind of attr value again, or ResourceId of value. 51 // END_DOC_TAG = 0x00100101; 52 private static final int START_TAG = 0x00100102; 53 private static final int END_TAG = 0x00100103; 54 private static final String SPACES = " "; 55 56 /** 57 * Parse the 'compressed' binary form of Android XML docs such as for AndroidManifest.xml in .apk 58 * files. 59 * 60 * @param xml byte array of AndroidManifest.xml. 61 * @return String with data of AndroidManifest.xml. 62 */ decompressXml(byte[] xml)63 static String decompressXml(byte[] xml) { 64 String out = ""; 65 // Compressed XML file/bytes starts with 24x bytes of data, 66 // 9 32 bit words in little endian order (LSB first): 67 // 0th word is 03 00 08 00 68 // 3rd word SEEMS TO BE: Offset at then of StringTable 69 // 4th word is: Number of strings in string table 70 // WARNING: Sometime I indiscriminately display or refer to word in 71 // little endian storage format, or in integer format (ie MSB first). 72 int numbStrings = littleEndianValue(xml, 4 * 4); 73 // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets 74 // of the length/string data in the StringTable. 75 int sitOff = 0x24; // Offset of start of StringIndexTable. 76 // StringTable, each string is represented with a 16 bit little endian 77 // character count, followed by that number of 16 bit (LE) (Unicode) chars. 78 int stOff = sitOff + numbStrings * 4; // StringTable follows StrIndexTable. 79 // Step through the XML tree element tags and attributes. 80 int off = scanForFirstStartTag(xml); 81 int indent = 0; 82 83 while (off < xml.length) { 84 int tag0 = littleEndianValue(xml, off); 85 int nameSi = littleEndianValue(xml, off + 5 * 4); 86 if (tag0 == START_TAG) { 87 int numbAttrs = littleEndianValue(xml, off + 7 * 4); // Number of Attributes to follow. 88 off += 9 * 4; // Skip over 6+3 words of START_TAG data 89 String name = compXmlString(xml, sitOff, stOff, nameSi); 90 // Look for the Attributes 91 StringBuilder sb = new StringBuilder(); 92 for (int ii = 0; ii < numbAttrs; ii++) { 93 int attrNameSi = littleEndianValue(xml, off + 4); // AttrName String Index. 94 int attrValueSi = littleEndianValue(xml, off + 2 * 4); // AttrValue Str Ind, or FFFFFFFF. 95 int attrResId = littleEndianValue(xml, off + 4 * 4); // AttrValue ResourceId or dup. 96 // AttrValue StrInd. 97 off += 5 * 4; // Skip over the 5 words of an attribute. 98 String attrName = compXmlString(xml, sitOff, stOff, attrNameSi); 99 String attrValue = attrValueSi != -1 100 ? compXmlString(xml, sitOff, stOff, attrValueSi) 101 : "resourceID 0x" + Integer.toHexString(attrResId); 102 sb.append(" ").append(attrName).append("=\"").append(attrValue).append("\""); 103 } 104 out += SPACES.substring(0, Math.min(indent * 2, SPACES.length())) + "<" + name + sb + ">"; 105 indent++; 106 } else if (tag0 == END_TAG) { 107 indent--; 108 off += 6 * 4; // Skip over 6 words of END_TAG data 109 String name = compXmlString(xml, sitOff, stOff, nameSi); 110 out += SPACES.substring(0, Math.min(indent * 2, SPACES.length())) + "</" + name + ">"; 111 112 } else { 113 break; 114 } 115 } 116 return out; 117 } 118 compXmlString(byte[] xml, int sitOff, int stOff, int strInd)119 private static String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) { 120 if (strInd < 0) return null; 121 int strOff = stOff + littleEndianValue(xml, sitOff + strInd * 4); 122 return compXmlStringAt(xml, strOff); 123 } 124 125 /** 126 * @return Return the string stored in StringTable format at offset strOff. This offset points to 127 * the 16 bit string length, which is followed by that number of 16 bit (Unicode) chars. 128 */ compXmlStringAt(byte[] arr, int strOff)129 private static String compXmlStringAt(byte[] arr, int strOff) { 130 int strLen = arr[strOff + 1] << 8 & 0xff00 | arr[strOff] & 0xff; 131 byte[] chars = new byte[strLen]; 132 for (int ii = 0; ii < strLen; ii++) { 133 chars[ii] = arr[strOff + 2 + ii * 2]; 134 } 135 return new String(chars); // Hack, just use 8 byte chars. 136 } 137 138 /** 139 * @return Return value of a Little Endian 32 bit word from the byte array at offset off. 140 */ littleEndianValue(byte[] arr, int off)141 private static int littleEndianValue(byte[] arr, int off) { 142 return arr[off + 3] << 24 & 0xff000000 | arr[off + 2] << 16 & 0xff0000 143 | arr[off + 1] << 8 & 0xff00 | arr[off] & 0xFF; 144 } 145 scanForFirstStartTag(byte[] xml)146 private static int scanForFirstStartTag(byte[] xml) { 147 // XMLTags, The XML tag tree starts after some unknown content after the 148 // StringTable. There is some unknown data after the StringTable, scan forward 149 // from this point to the flag for the start of an XML start tag. 150 int xmlTagOff = littleEndianValue(xml, 3 * 4); // Start from the offset in the 3rd word. 151 // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int). 152 for (int ii = xmlTagOff; ii < xml.length - 4; ii += 4) { 153 if (littleEndianValue(xml, ii) == START_TAG) { 154 xmlTagOff = ii; 155 break; 156 } 157 } 158 return xmlTagOff; 159 } 160 } 161