1 /*
2  * Copyright 2017, Leanplum, Inc. All rights reserved.
3  *
4  * Licensed to the Apache Software Foundation (ASF) under one
5  * or more contributor license agreements.  See the NOTICE file
6  * distributed with this work for additional information
7  * regarding copyright ownership.  The ASF licenses this file
8  * to you under the Apache License, Version 2.0 (the
9  * "License"); you may not use this file except in compliance
10  * with the License.  You may obtain a copy of the License at
11  *
12  *        http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing,
15  * software distributed under the License is distributed on an
16  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  * KIND, either express or implied.  See the License for the
18  * specific language governing permissions and limitations
19  * under the License.
20  */
21 
22 package com.leanplum.internal;
23 
24 /**
25  * LeanplumManifestParser class for get AndroidManifest.xml. http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package
26  *
27  * @author Anna Orlova
28  */
29 class LeanplumManifestParser {
30   // XML tags and attributes:
31   // Every XML start and end tag consists of 6 32 bit words:
32   //   0th word: 02011000 for START_TAG and 03011000 for END_TAG
33   //   1st word: a flag?, like 38000000
34   //   2nd word: Line of where this tag appeared in the original source file
35   //   3rd word: FFFFFFFF ??
36   //   4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS
37   //   5th word: StringIndex of Element Name
38   //   (Note: 01011000 in 0th word means end of XML document, END_DOC_TAG).
39 
40   // Start tags (not end tags) contain 3 more words:
41   //   6th word: 14001400 meaning??
42   //   7th word: Number of Attributes that follow this tag(follow word 8th)
43   //   8th word: 00000000 meaning??
44 
45   // Attributes consist of 5 words:
46   //   0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF
47   //   1st word: StringIndex of Attribute Name
48   //   2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId used
49   //   3rd word: Flags?
50   //   4th word: str ind of attr value again, or ResourceId of value.
51   // END_DOC_TAG = 0x00100101;
52   private static final int START_TAG = 0x00100102;
53   private static final int END_TAG = 0x00100103;
54   private static final String SPACES = "                                             ";
55 
56   /**
57    * Parse the 'compressed' binary form of Android XML docs such as for AndroidManifest.xml in .apk
58    * files.
59    *
60    * @param xml byte array of AndroidManifest.xml.
61    * @return String with data of AndroidManifest.xml.
62    */
decompressXml(byte[] xml)63   static String decompressXml(byte[] xml) {
64     String out = "";
65     // Compressed XML file/bytes starts with 24x bytes of data,
66     // 9 32 bit words in little endian order (LSB first):
67     //   0th word is 03 00 08 00
68     //   3rd word SEEMS TO BE:  Offset at then of StringTable
69     //   4th word is: Number of strings in string table
70     // WARNING: Sometime I indiscriminately display or refer to word in
71     //   little endian storage format, or in integer format (ie MSB first).
72     int numbStrings = littleEndianValue(xml, 4 * 4);
73     // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets
74     // of the length/string data in the StringTable.
75     int sitOff = 0x24;  // Offset of start of StringIndexTable.
76     // StringTable, each string is represented with a 16 bit little endian
77     // character count, followed by that number of 16 bit (LE) (Unicode) chars.
78     int stOff = sitOff + numbStrings * 4;  // StringTable follows StrIndexTable.
79     // Step through the XML tree element tags and attributes.
80     int off = scanForFirstStartTag(xml);
81     int indent = 0;
82 
83     while (off < xml.length) {
84       int tag0 = littleEndianValue(xml, off);
85       int nameSi = littleEndianValue(xml, off + 5 * 4);
86       if (tag0 == START_TAG) {
87         int numbAttrs = littleEndianValue(xml, off + 7 * 4);  // Number of Attributes to follow.
88         off += 9 * 4;  // Skip over 6+3 words of START_TAG data
89         String name = compXmlString(xml, sitOff, stOff, nameSi);
90         // Look for the Attributes
91         StringBuilder sb = new StringBuilder();
92         for (int ii = 0; ii < numbAttrs; ii++) {
93           int attrNameSi = littleEndianValue(xml, off + 4);  // AttrName String Index.
94           int attrValueSi = littleEndianValue(xml, off + 2 * 4); // AttrValue Str Ind, or FFFFFFFF.
95           int attrResId = littleEndianValue(xml, off + 4 * 4);  // AttrValue ResourceId or dup.
96           // AttrValue StrInd.
97           off += 5 * 4;  // Skip over the 5 words of an attribute.
98           String attrName = compXmlString(xml, sitOff, stOff, attrNameSi);
99           String attrValue = attrValueSi != -1
100               ? compXmlString(xml, sitOff, stOff, attrValueSi)
101               : "resourceID 0x" + Integer.toHexString(attrResId);
102           sb.append(" ").append(attrName).append("=\"").append(attrValue).append("\"");
103         }
104         out += SPACES.substring(0, Math.min(indent * 2, SPACES.length())) + "<" + name + sb + ">";
105         indent++;
106       } else if (tag0 == END_TAG) {
107         indent--;
108         off += 6 * 4;  // Skip over 6 words of END_TAG data
109         String name = compXmlString(xml, sitOff, stOff, nameSi);
110         out += SPACES.substring(0, Math.min(indent * 2, SPACES.length())) + "</" + name + ">";
111 
112       } else {
113         break;
114       }
115     }
116     return out;
117   }
118 
compXmlString(byte[] xml, int sitOff, int stOff, int strInd)119   private static String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) {
120     if (strInd < 0) return null;
121     int strOff = stOff + littleEndianValue(xml, sitOff + strInd * 4);
122     return compXmlStringAt(xml, strOff);
123   }
124 
125   /**
126    * @return Return the string stored in StringTable format at offset strOff.  This offset points to
127    * the 16 bit string length, which is followed by that number of 16 bit (Unicode) chars.
128    */
compXmlStringAt(byte[] arr, int strOff)129   private static String compXmlStringAt(byte[] arr, int strOff) {
130     int strLen = arr[strOff + 1] << 8 & 0xff00 | arr[strOff] & 0xff;
131     byte[] chars = new byte[strLen];
132     for (int ii = 0; ii < strLen; ii++) {
133       chars[ii] = arr[strOff + 2 + ii * 2];
134     }
135     return new String(chars);  // Hack, just use 8 byte chars.
136   }
137 
138   /**
139    * @return Return value of a Little Endian 32 bit word from the byte array at offset off.
140    */
littleEndianValue(byte[] arr, int off)141   private static int littleEndianValue(byte[] arr, int off) {
142     return arr[off + 3] << 24 & 0xff000000 | arr[off + 2] << 16 & 0xff0000
143         | arr[off + 1] << 8 & 0xff00 | arr[off] & 0xFF;
144   }
145 
scanForFirstStartTag(byte[] xml)146   private static int scanForFirstStartTag(byte[] xml) {
147     // XMLTags, The XML tag tree starts after some unknown content after the
148     // StringTable.  There is some unknown data after the StringTable, scan forward
149     // from this point to the flag for the start of an XML start tag.
150     int xmlTagOff = littleEndianValue(xml, 3 * 4);  // Start from the offset in the 3rd word.
151     // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int).
152     for (int ii = xmlTagOff; ii < xml.length - 4; ii += 4) {
153       if (littleEndianValue(xml, ii) == START_TAG) {
154         xmlTagOff = ii;
155         break;
156       }
157     }
158     return xmlTagOff;
159   }
160 }
161