1 /*
2  * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  *   - Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  *
11  *   - Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *   - Neither the name of Oracle nor the names of its
16  *     contributors may be used to endorse or promote products derived
17  *     from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "jni.h"
33 #include "imageDecompressor.hpp"
34 #include "endian.hpp"
35 #ifdef WIN32
36 #include <windows.h>
37 #else
38 #include <dlfcn.h>
39 #endif
40 
41 typedef jboolean (*ZipInflateFully_t)(void *inBuf, jlong inLen,
42                                       void *outBuf, jlong outLen, char **pmsg);
43 static ZipInflateFully_t ZipInflateFully        = NULL;
44 
45 #ifndef WIN32
46     #define JNI_LIB_PREFIX "lib"
47     #ifdef __APPLE__
48         #define JNI_LIB_SUFFIX ".dylib"
49     #else
50         #define JNI_LIB_SUFFIX ".so"
51     #endif
52 #endif
53 
54 /**
55  * Return the address of the entry point named in the zip shared library.
56  * @param name - the name of the entry point
57  * @return the address of the entry point or NULL
58  */
findEntry(const char * name)59 static void* findEntry(const char* name) {
60     void *addr = NULL;
61 #ifdef WIN32
62     HMODULE handle = GetModuleHandle("zip.dll");
63     if (handle == NULL) {
64       handle = LoadLibrary("zip.dll");
65     }
66     if (handle == NULL) {
67       return NULL;
68     }
69     addr = (void*) GetProcAddress(handle, name);
70     return addr;
71 #else
72     addr = dlopen(JNI_LIB_PREFIX "zip" JNI_LIB_SUFFIX, RTLD_GLOBAL|RTLD_LAZY);
73     if (addr == NULL) {
74         return NULL;
75     }
76     addr = dlsym(addr, name);
77     return addr;
78 #endif
79 }
80 
81 /*
82  * Initialize the array of decompressors.
83  */
84 int ImageDecompressor::_decompressors_num = 0;
85 ImageDecompressor** ImageDecompressor::_decompressors = NULL;
image_decompressor_init()86 void ImageDecompressor::image_decompressor_init() {
87     if (_decompressors == NULL) {
88         ZipInflateFully = (ZipInflateFully_t) findEntry("ZIP_InflateFully");
89      assert(ZipInflateFully != NULL && "ZIP decompressor not found.");
90         _decompressors_num = 2;
91         _decompressors = new ImageDecompressor*[_decompressors_num];
92         _decompressors[0] = new ZipDecompressor("zip");
93         _decompressors[1] = new SharedStringDecompressor("compact-cp");
94     }
95 }
96 
image_decompressor_close()97 void ImageDecompressor::image_decompressor_close() {
98     delete[] _decompressors;
99 }
100 
101 /*
102  * Locate decompressor.
103  */
get_decompressor(const char * decompressor_name)104 ImageDecompressor* ImageDecompressor::get_decompressor(const char * decompressor_name) {
105     image_decompressor_init();
106     for (int i = 0; i < _decompressors_num; i++) {
107         ImageDecompressor* decompressor = _decompressors[i];
108         assert(decompressor != NULL && "Decompressors not initialized.");
109         if (strcmp(decompressor->get_name(), decompressor_name) == 0) {
110             return decompressor;
111         }
112     }
113     assert(false && "No decompressor found.");
114     return NULL;
115 }
116 
117 // Sparc to read unaligned content
118 // u8 l = (*(u8*) ptr);
119 // If ptr is not aligned, sparc will fail.
getU8(u1 * ptr,Endian * endian)120 u8 ImageDecompressor::getU8(u1* ptr, Endian *endian) {
121     u8 ret;
122     if (endian->is_big_endian()) {
123         ret = (u8)ptr[0] << 56 | (u8)ptr[1] << 48 | (u8)ptr[2]<<40 | (u8)ptr[3]<<32 |
124                 ptr[4]<<24 | ptr[5]<<16 | ptr[6]<<8 | ptr[7];
125     } else {
126         ret = ptr[0] | ptr[1]<<8 | ptr[2]<<16 | ptr[3]<<24 | (u8)ptr[4]<<32 |
127                 (u8)ptr[5]<<40 | (u8)ptr[6]<<48 | (u8)ptr[7]<<56;
128     }
129     return ret;
130 }
131 
getU4(u1 * ptr,Endian * endian)132 u4 ImageDecompressor::getU4(u1* ptr, Endian *endian) {
133     u4 ret;
134     if (endian->is_big_endian()) {
135         ret = ptr[0] << 24 | ptr[1]<<16 | (ptr[2]<<8) | ptr[3];
136     } else {
137         ret = ptr[0] | ptr[1]<<8 | (ptr[2]<<16) | ptr[3]<<24;
138     }
139     return ret;
140 }
141 
142 /*
143  * Decompression entry point. Called from ImageFileReader::get_resource.
144  */
decompress_resource(u1 * compressed,u1 * uncompressed,u8 uncompressed_size,const ImageStrings * strings,Endian * endian)145 void ImageDecompressor::decompress_resource(u1* compressed, u1* uncompressed,
146                 u8 uncompressed_size, const ImageStrings* strings, Endian *endian) {
147     bool has_header = false;
148     u1* decompressed_resource = compressed;
149     u1* compressed_resource = compressed;
150     // Resource could have been transformed by a stack of decompressors.
151     // Iterate and decompress resources until there is no more header.
152     do {
153         ResourceHeader _header;
154         u1* compressed_resource_base = compressed_resource;
155         _header._magic = getU4(compressed_resource, endian);
156         compressed_resource += 4;
157         _header._size = getU8(compressed_resource, endian);
158         compressed_resource += 8;
159         _header._uncompressed_size = getU8(compressed_resource, endian);
160         compressed_resource += 8;
161         _header._decompressor_name_offset = getU4(compressed_resource, endian);
162         compressed_resource += 4;
163         _header._decompressor_config_offset = getU4(compressed_resource, endian);
164         compressed_resource += 4;
165         _header._is_terminal = *compressed_resource;
166         compressed_resource += 1;
167         has_header = _header._magic == ResourceHeader::resource_header_magic;
168         if (has_header) {
169             // decompressed_resource array contains the result of decompression
170             decompressed_resource = new u1[(size_t) _header._uncompressed_size];
171             // Retrieve the decompressor name
172             const char* decompressor_name = strings->get(_header._decompressor_name_offset);
173             assert(decompressor_name && "image decompressor not found");
174             // Retrieve the decompressor instance
175             ImageDecompressor* decompressor = get_decompressor(decompressor_name);
176             assert(decompressor && "image decompressor not found");
177             // Ask the decompressor to decompress the compressed content
178             decompressor->decompress_resource(compressed_resource, decompressed_resource,
179                 &_header, strings);
180             if (compressed_resource_base != compressed) {
181                 delete[] compressed_resource_base;
182             }
183             compressed_resource = decompressed_resource;
184         }
185     } while (has_header);
186     memcpy(uncompressed, decompressed_resource, (size_t) uncompressed_size);
187     delete[] decompressed_resource;
188 }
189 
190 // Zip decompressor
191 
decompress_resource(u1 * data,u1 * uncompressed,ResourceHeader * header,const ImageStrings * strings)192 void ZipDecompressor::decompress_resource(u1* data, u1* uncompressed,
193                 ResourceHeader* header, const ImageStrings* strings) {
194     char* msg = NULL;
195     jboolean res = ZipDecompressor::decompress(data, header->_size, uncompressed,
196                     header->_uncompressed_size, &msg);
197     assert(res && "decompression failed");
198 }
199 
decompress(void * in,u8 inSize,void * out,u8 outSize,char ** pmsg)200 jboolean ZipDecompressor::decompress(void *in, u8 inSize, void *out, u8 outSize, char **pmsg) {
201     return (*ZipInflateFully)(in, inSize, out, outSize, pmsg);
202 }
203 
204 // END Zip Decompressor
205 
206 // Shared String decompressor
207 
208 // array index is the constant pool tag. value is size.
209 // eg: array[5]  = 8; means size of long is 8 bytes.
210 const u1 SharedStringDecompressor::sizes[] = {
211     0, 0, 0, 4, 4, 8, 8, 2, 2, 4, 4, 4, 4, 0, 0, 3, 2, 0, 4
212 };
213 /**
214  * Recreate the class by reconstructing the constant pool.
215  */
decompress_resource(u1 * data,u1 * uncompressed_resource,ResourceHeader * header,const ImageStrings * strings)216 void SharedStringDecompressor::decompress_resource(u1* data,
217                 u1* uncompressed_resource,
218                 ResourceHeader* header, const ImageStrings* strings) {
219     u1* uncompressed_base = uncompressed_resource;
220     u1* data_base = data;
221     int header_size = 8; // magic + major + minor
222     memcpy(uncompressed_resource, data, header_size + 2); //+ cp count
223     uncompressed_resource += header_size + 2;
224     data += header_size;
225     u2 cp_count = Endian::get_java(data);
226     data += 2;
227     for (int i = 1; i < cp_count; i++) {
228         u1 tag = *data;
229         data += 1;
230         switch (tag) {
231 
232             case externalized_string:
233             { // String in Strings table
234                 *uncompressed_resource = 1;
235                 uncompressed_resource += 1;
236                 int k = decompress_int(data);
237                 const char * string = strings->get(k);
238                 int str_length = (int) strlen(string);
239                 Endian::set_java(uncompressed_resource, str_length);
240                 uncompressed_resource += 2;
241                 memcpy(uncompressed_resource, string, str_length);
242                 uncompressed_resource += str_length;
243                 break;
244             }
245             // Descriptor String has been split and types added to Strings table
246             case externalized_string_descriptor:
247             {
248                 *uncompressed_resource = 1;
249                 uncompressed_resource += 1;
250                 int descriptor_index = decompress_int(data);
251                 int indexes_length = decompress_int(data);
252                 u1* length_address = uncompressed_resource;
253                 uncompressed_resource += 2;
254                 int desc_length = 0;
255                 const char * desc_string = strings->get(descriptor_index);
256                 if (indexes_length > 0) {
257                     u1* indexes_base = data;
258                     data += indexes_length;
259                     char c = *desc_string;
260                     do {
261                         *uncompressed_resource = c;
262                         uncompressed_resource++;
263                         desc_length += 1;
264                         /*
265                          * Every L character is the marker we are looking at in order
266                          * to reconstruct the descriptor. Each time an L is found, then
267                          * we retrieve the couple token/token at the current index and
268                          * add it to the descriptor.
269                          * "(L;I)V" and "java/lang","String" couple of tokens,
270                          * this becomes "(Ljava/lang/String;I)V"
271                          */
272                         if (c == 'L') {
273                             int index = decompress_int(indexes_base);
274                             const char * pkg = strings->get(index);
275                             int str_length = (int) strlen(pkg);
276                             // the case where we have a package.
277                             // reconstruct the type full name
278                             if (str_length > 0) {
279                                 int len = str_length + 1;
280                                 char* fullpkg = new char[len];
281                                 char* pkg_base = fullpkg;
282                                 memcpy(fullpkg, pkg, str_length);
283                                 fullpkg += str_length;
284                                 *fullpkg = '/';
285                                 memcpy(uncompressed_resource, pkg_base, len);
286                                 uncompressed_resource += len;
287                                 delete[] pkg_base;
288                                 desc_length += len;
289                             } else { // Empty package
290                                 // Nothing to do.
291                             }
292                             int classIndex = decompress_int(indexes_base);
293                             const char * clazz = strings->get(classIndex);
294                             int clazz_length = (int) strlen(clazz);
295                             memcpy(uncompressed_resource, clazz, clazz_length);
296                             uncompressed_resource += clazz_length;
297                             desc_length += clazz_length;
298                         }
299                         desc_string += 1;
300                         c = *desc_string;
301                     } while (c != '\0');
302                 } else {
303                         desc_length = (int) strlen(desc_string);
304                         memcpy(uncompressed_resource, desc_string, desc_length);
305                         uncompressed_resource += desc_length;
306                 }
307                 Endian::set_java(length_address, desc_length);
308                 break;
309             }
310 
311             case constant_utf8:
312             { // UTF-8
313                 *uncompressed_resource = tag;
314                 uncompressed_resource += 1;
315                 u2 str_length = Endian::get_java(data);
316                 int len = str_length + 2;
317                 memcpy(uncompressed_resource, data, len);
318                 uncompressed_resource += len;
319                 data += len;
320                 break;
321             }
322 
323             case constant_long:
324             case constant_double:
325             {
326                 i++;
327             }
328             /* fall through */
329             default:
330             {
331                 *uncompressed_resource = tag;
332                 uncompressed_resource += 1;
333                 int size = sizes[tag];
334                 memcpy(uncompressed_resource, data, size);
335                 uncompressed_resource += size;
336                 data += size;
337             }
338         }
339     }
340     u8 remain = header->_size - (int)(data - data_base);
341     u8 computed = (u8)(uncompressed_resource - uncompressed_base) + remain;
342     if (header->_uncompressed_size != computed)
343         printf("Failure, expecting %llu but getting %llu\n", header->_uncompressed_size,
344                 computed);
345     assert(header->_uncompressed_size == computed &&
346                 "Constant Pool reconstruction failed");
347     memcpy(uncompressed_resource, data, (size_t) remain);
348 }
349 
350 /*
351  * Decompress integers. Compressed integers are negative.
352  * If positive, the integer is not decompressed.
353  * If negative, length extracted from the first byte, then reconstruct the integer
354  * from the following bytes.
355  * Example of compression: 1 is compressed on 1 byte: 10100001
356  */
decompress_int(unsigned char * & value)357 int SharedStringDecompressor::decompress_int(unsigned char*& value) {
358     int len = 4;
359     int res = 0;
360     char b1 = *value;
361     if (is_compressed((signed char)b1)) { // compressed
362         len = get_compressed_length(b1);
363         char clearedValue = b1 &= 0x1F;
364         if (len == 1) {
365             res = clearedValue;
366         } else {
367             res = (clearedValue & 0xFF) << 8 * (len - 1);
368             for (int i = 1; i < len; i++) {
369                 res |= (value[i]&0xFF) << 8 * (len - i - 1);
370             }
371         }
372     } else {
373         res = (value[0] & 0xFF) << 24 | (value[1]&0xFF) << 16 |
374                     (value[2]&0xFF) << 8 | (value[3]&0xFF);
375     }
376     value += len;
377     return res;
378 }
379 // END Shared String decompressor
380