1 /*
2  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  *   - Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  *
11  *   - Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *   - Neither the name of Oracle nor the names of its
16  *     contributors may be used to endorse or promote products derived
17  *     from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <assert.h>
33 #include <string.h>
34 #include <stdlib.h>
35 
36 #include "endian.hpp"
37 #include "imageDecompressor.hpp"
38 #include "imageFile.hpp"
39 #include "inttypes.hpp"
40 #include "jni.h"
41 #include "osSupport.hpp"
42 
43 // Map the full jimage, only with 64 bit addressing.
44 bool ImageFileReader::memory_map_image = sizeof(void *) == 8;
45 
46 #ifdef WIN32
47 const char FileSeparator = '\\';
48 #else
49 const char FileSeparator = '/';
50 #endif
51 
52 // Image files are an alternate file format for storing classes and resources. The
53 // goal is to supply file access which is faster and smaller than the jar format.
54 //
55 // (More detailed nodes in the header.)
56 //
57 
58 // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
hash_code(const char * string,s4 seed)59 s4 ImageStrings::hash_code(const char* string, s4 seed) {
60     assert(seed > 0 && "invariant");
61     // Access bytes as unsigned.
62     u1* bytes = (u1*)string;
63     u4 useed = (u4)seed;
64     // Compute hash code.
65     for (u1 byte = *bytes++; byte; byte = *bytes++) {
66         useed = (useed * HASH_MULTIPLIER) ^ byte;
67     }
68     // Ensure the result is not signed.
69     return (s4)(useed & 0x7FFFFFFF);
70 }
71 
72 // Match up a string in a perfect hash table.
73 // Returns the index where the name should be.
74 // Result still needs validation for precise match (false positive.)
find(Endian * endian,const char * name,s4 * redirect,u4 length)75 s4 ImageStrings::find(Endian* endian, const char* name, s4* redirect, u4 length) {
76     // If the table is empty, then short cut.
77     if (!redirect || !length) {
78         return NOT_FOUND;
79     }
80     // Compute the basic perfect hash for name.
81     s4 hash_code = ImageStrings::hash_code(name);
82     // Modulo table size.
83     s4 index = hash_code % length;
84     // Get redirect entry.
85     //   value == 0 then not found
86     //   value < 0 then -1 - value is true index
87     //   value > 0 then value is seed for recomputing hash.
88     s4 value = endian->get(redirect[index]);
89     // if recompute is required.
90     if (value > 0 ) {
91         // Entry collision value, need to recompute hash.
92         hash_code = ImageStrings::hash_code(name, value);
93         // Modulo table size.
94         return hash_code % length;
95     } else if (value < 0) {
96         // Compute direct index.
97         return -1 - value;
98     }
99     // No entry found.
100     return NOT_FOUND;
101 }
102 
103 // Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
104 // return non-NULL address of remaining portion of string.  Otherwise, return
105 // NULL.    Used to test sections of a path without copying from image string
106 // table.
starts_with(const char * string,const char * start)107 const char* ImageStrings::starts_with(const char* string, const char* start) {
108     char ch1, ch2;
109     // Match up the strings the best we can.
110     while ((ch1 = *string) && (ch2 = *start)) {
111         if (ch1 != ch2) {
112             // Mismatch, return NULL.
113             return NULL;
114         }
115         // Next characters.
116         string++, start++;
117     }
118     // Return remainder of string.
119     return string;
120 }
121 
122 // Inflates the attribute stream into individual values stored in the long
123 // array _attributes. This allows an attribute value to be quickly accessed by
124 // direct indexing.  Unspecified values default to zero (from constructor.)
set_data(u1 * data)125 void ImageLocation::set_data(u1* data) {
126     // Deflate the attribute stream into an array of attributes.
127     u1 byte;
128     // Repeat until end header is found.
129     while ((data != NULL) && (byte = *data)) {
130         // Extract kind from header byte.
131         u1 kind = attribute_kind(byte);
132         assert(kind < ATTRIBUTE_COUNT && "invalid image location attribute");
133         // Extract length of data (in bytes).
134         u1 n = attribute_length(byte);
135         // Read value (most significant first.)
136         _attributes[kind] = attribute_value(data + 1, n);
137         // Position to next attribute by skipping attribute header and data bytes.
138         data += n + 1;
139     }
140 }
141 
142 // Zero all attribute values.
clear_data()143 void ImageLocation::clear_data() {
144     // Set defaults to zero.
145     memset(_attributes, 0, sizeof(_attributes));
146 }
147 
148 // ImageModuleData constructor maps out sub-tables for faster access.
ImageModuleData(const ImageFileReader * image_file)149 ImageModuleData::ImageModuleData(const ImageFileReader* image_file) :
150         _image_file(image_file),
151         _endian(image_file->endian()) {
152 }
153 
154 // Release module data resource.
~ImageModuleData()155 ImageModuleData::~ImageModuleData() {
156 }
157 
158 
159 // Return the module in which a package resides.    Returns NULL if not found.
package_to_module(const char * package_name)160 const char* ImageModuleData::package_to_module(const char* package_name) {
161     // replace all '/' by '.'
162     char* replaced = new char[(int) strlen(package_name) + 1];
163     assert(replaced != NULL && "allocation failed");
164     int i;
165     for (i = 0; package_name[i] != '\0'; i++) {
166       replaced[i] = package_name[i] == '/' ? '.' : package_name[i];
167     }
168     replaced[i] = '\0';
169 
170     // build path /packages/<package_name>
171     const char* radical = "/packages/";
172     char* path = new char[(int) strlen(radical) + (int) strlen(package_name) + 1];
173     assert(path != NULL && "allocation failed");
174     strcpy(path, radical);
175     strcat(path, replaced);
176     delete[] replaced;
177 
178     // retrieve package location
179     ImageLocation location;
180     bool found = _image_file->find_location(path, location);
181     delete[] path;
182     if (!found) {
183         return NULL;
184     }
185 
186     // retrieve offsets to module name
187     int size = (int)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
188     u1* content = new u1[size];
189     assert(content != NULL && "allocation failed");
190     _image_file->get_resource(location, content);
191     u1* ptr = content;
192     // sequence of sizeof(8) isEmpty|offset. Use the first module that is not empty.
193     u4 offset = 0;
194     for (i = 0; i < size; i+=8) {
195         u4 isEmpty = _endian->get(*((u4*)ptr));
196         ptr += 4;
197         if (!isEmpty) {
198             offset = _endian->get(*((u4*)ptr));
199             break;
200         }
201         ptr += 4;
202     }
203     delete[] content;
204     return _image_file->get_strings().get(offset);
205 }
206 
207 // Manage a table of open image files.  This table allows multiple access points
208 // to share an open image.
ImageFileReaderTable()209 ImageFileReaderTable::ImageFileReaderTable() : _count(0), _max(_growth) {
210     _table = static_cast<ImageFileReader**>(calloc(_max, sizeof(ImageFileReader*)));
211     assert(_table != NULL && "allocation failed");
212 }
213 
~ImageFileReaderTable()214 ImageFileReaderTable::~ImageFileReaderTable() {
215     for (u4 i = 0; i < _count; i++) {
216         ImageFileReader* image = _table[i];
217 
218         if (image != NULL) {
219             delete image;
220         }
221     }
222     free(_table);
223 }
224 
225 // Add a new image entry to the table.
add(ImageFileReader * image)226 void ImageFileReaderTable::add(ImageFileReader* image) {
227     if (_count == _max) {
228         _max += _growth;
229         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
230     }
231     _table[_count++] = image;
232 }
233 
234 // Remove an image entry from the table.
remove(ImageFileReader * image)235 void ImageFileReaderTable::remove(ImageFileReader* image) {
236     for (u4 i = 0; i < _count; i++) {
237         if (_table[i] == image) {
238             // Swap the last element into the found slot
239             _table[i] = _table[--_count];
240             break;
241         }
242     }
243 
244     if (_count != 0 && _count == _max - _growth) {
245         _max -= _growth;
246         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
247     }
248 }
249 
250 // Determine if image entry is in table.
contains(ImageFileReader * image)251 bool ImageFileReaderTable::contains(ImageFileReader* image) {
252     for (u4 i = 0; i < _count; i++) {
253         if (_table[i] == image) {
254             return true;
255         }
256     }
257     return false;
258 }
259 
260 // Table to manage multiple opens of an image file.
261 ImageFileReaderTable ImageFileReader::_reader_table;
262 
263 SimpleCriticalSection _reader_table_lock;
264 
265 // Locate an image if file already open.
find_image(const char * name)266 ImageFileReader* ImageFileReader::find_image(const char* name) {
267     // Lock out _reader_table.
268     SimpleCriticalSectionLock cs(&_reader_table_lock);
269     // Search for an exist image file.
270     for (u4 i = 0; i < _reader_table.count(); i++) {
271         // Retrieve table entry.
272         ImageFileReader* reader = _reader_table.get(i);
273         // If name matches, then reuse (bump up use count.)
274         assert(reader->name() != NULL && "reader->name must not be null");
275         if (strcmp(reader->name(), name) == 0) {
276             reader->inc_use();
277             return reader;
278         }
279     }
280 
281     return NULL;
282 }
283 
284 // Open an image file, reuse structure if file already open.
open(const char * name,bool big_endian)285 ImageFileReader* ImageFileReader::open(const char* name, bool big_endian) {
286     ImageFileReader* reader = find_image(name);
287     if (reader != NULL) {
288         return reader;
289     }
290 
291     // Need a new image reader.
292     reader = new ImageFileReader(name, big_endian);
293     if (reader == NULL || !reader->open()) {
294         // Failed to open.
295         delete reader;
296         return NULL;
297     }
298 
299     // Lock to update
300     SimpleCriticalSectionLock cs(&_reader_table_lock);
301     // Search for an existing image file.
302     for (u4 i = 0; i < _reader_table.count(); i++) {
303         // Retrieve table entry.
304         ImageFileReader* existing_reader = _reader_table.get(i);
305         // If name matches, then reuse (bump up use count.)
306         assert(reader->name() != NULL && "reader->name still must not be null");
307         if (strcmp(existing_reader->name(), name) == 0) {
308             existing_reader->inc_use();
309             reader->close();
310             delete reader;
311             return existing_reader;
312         }
313     }
314     // Bump use count and add to table.
315     reader->inc_use();
316     _reader_table.add(reader);
317     return reader;
318 }
319 
320 // Close an image file if the file is not in use elsewhere.
close(ImageFileReader * reader)321 void ImageFileReader::close(ImageFileReader *reader) {
322     // Lock out _reader_table.
323     SimpleCriticalSectionLock cs(&_reader_table_lock);
324     // If last use then remove from table and then close.
325     if (reader->dec_use()) {
326         _reader_table.remove(reader);
327         delete reader;
328     }
329 }
330 
331 // Return an id for the specifed ImageFileReader.
reader_to_ID(ImageFileReader * reader)332 u8 ImageFileReader::reader_to_ID(ImageFileReader *reader) {
333     // ID is just the cloaked reader address.
334     return (u8)reader;
335 }
336 
337 // Validate the image id.
id_check(u8 id)338 bool ImageFileReader::id_check(u8 id) {
339     // Make sure the ID is a managed (_reader_table) reader.
340     SimpleCriticalSectionLock cs(&_reader_table_lock);
341     return _reader_table.contains((ImageFileReader*)id);
342 }
343 
344 // Return an id for the specifed ImageFileReader.
id_to_reader(u8 id)345 ImageFileReader* ImageFileReader::id_to_reader(u8 id) {
346     assert(id_check(id) && "invalid image id");
347     return (ImageFileReader*)id;
348 }
349 
350 // Constructor intializes to a closed state.
ImageFileReader(const char * name,bool big_endian)351 ImageFileReader::ImageFileReader(const char* name, bool big_endian) :
352     _module_data(NULL) {
353     // Copy the image file name.
354      int len = (int) strlen(name) + 1;
355     _name = new char[len];
356     assert(_name != NULL  && "allocation failed");
357     strncpy(_name, name, len);
358     // Initialize for a closed file.
359     _fd = -1;
360     _endian = Endian::get_handler(big_endian);
361     _index_data = NULL;
362 }
363 
364 // Close image and free up data structures.
~ImageFileReader()365 ImageFileReader::~ImageFileReader() {
366     // Ensure file is closed.
367     close();
368     // Free up name.
369     if (_name) {
370         delete[] _name;
371         _name = NULL;
372     }
373 
374     if (_module_data != NULL) {
375         delete _module_data;
376     }
377 }
378 
379 // Open image file for read access.
open()380 bool ImageFileReader::open() {
381     // If file exists open for reading.
382     _fd = osSupport::openReadOnly(_name);
383     if (_fd == -1) {
384         return false;
385     }
386     // Retrieve the file size.
387     _file_size = osSupport::size(_name);
388     // Read image file header and verify it has a valid header.
389     size_t header_size = sizeof(ImageHeader);
390     if (_file_size < header_size ||
391         !read_at((u1*)&_header, header_size, 0) ||
392         _header.magic(_endian) != IMAGE_MAGIC ||
393         _header.major_version(_endian) != MAJOR_VERSION ||
394         _header.minor_version(_endian) != MINOR_VERSION) {
395         close();
396         return false;
397     }
398     // Size of image index.
399     _index_size = index_size();
400     // Make sure file is large enough to contain the index.
401     if (_file_size < _index_size) {
402         return false;
403     }
404     // Memory map image (minimally the index.)
405     _index_data = (u1*)osSupport::map_memory(_fd, _name, 0, (size_t)map_size());
406     assert(_index_data && "image file not memory mapped");
407     // Retrieve length of index perfect hash table.
408     u4 length = table_length();
409     // Compute offset of the perfect hash table redirect table.
410     u4 redirect_table_offset = (u4)header_size;
411     // Compute offset of index attribute offsets.
412     u4 offsets_table_offset = redirect_table_offset + length * (u4)sizeof(s4);
413     // Compute offset of index location attribute data.
414     u4 location_bytes_offset = offsets_table_offset + length * (u4)sizeof(u4);
415     // Compute offset of index string table.
416     u4 string_bytes_offset = location_bytes_offset + locations_size();
417     // Compute address of the perfect hash table redirect table.
418     _redirect_table = (s4*)(_index_data + redirect_table_offset);
419     // Compute address of index attribute offsets.
420     _offsets_table = (u4*)(_index_data + offsets_table_offset);
421     // Compute address of index location attribute data.
422     _location_bytes = _index_data + location_bytes_offset;
423     // Compute address of index string table.
424     _string_bytes = _index_data + string_bytes_offset;
425 
426     // Initialize the module data
427     _module_data = new ImageModuleData(this);
428     // Successful open (if memory allocation succeeded).
429     return _module_data != NULL;
430 }
431 
432 // Close image file.
close()433 void ImageFileReader::close() {
434     // Deallocate the index.
435     if (_index_data) {
436         osSupport::unmap_memory((char*)_index_data, (size_t)map_size());
437         _index_data = NULL;
438     }
439     // Close file.
440     if (_fd != -1) {
441         osSupport::close(_fd);
442         _fd = -1;
443     }
444 
445     if (_module_data != NULL) {
446         delete _module_data;
447         _module_data = NULL;
448     }
449 }
450 
451 // Read directly from the file.
read_at(u1 * data,u8 size,u8 offset) const452 bool ImageFileReader::read_at(u1* data, u8 size, u8 offset) const {
453     return (u8)osSupport::read(_fd, (char*)data, size, offset) == size;
454 }
455 
456 // Find the location attributes associated with the path.    Returns true if
457 // the location is found, false otherwise.
find_location(const char * path,ImageLocation & location) const458 bool ImageFileReader::find_location(const char* path, ImageLocation& location) const {
459     // Locate the entry in the index perfect hash table.
460     s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
461     // If is found.
462     if (index != ImageStrings::NOT_FOUND) {
463         // Get address of first byte of location attribute stream.
464         u1* data = get_location_data(index);
465         // Expand location attributes.
466         location.set_data(data);
467         // Make sure result is not a false positive.
468         return verify_location(location, path);
469     }
470     return false;
471 }
472 
473 // Find the location index and size associated with the path.
474 // Returns the location index and size if the location is found, 0 otherwise.
find_location_index(const char * path,u8 * size) const475 u4 ImageFileReader::find_location_index(const char* path, u8 *size) const {
476     // Locate the entry in the index perfect hash table.
477     s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
478     // If found.
479     if (index != ImageStrings::NOT_FOUND) {
480         // Get address of first byte of location attribute stream.
481         u4 offset = get_location_offset(index);
482         u1* data = get_location_offset_data(offset);
483         // Expand location attributes.
484         ImageLocation location(data);
485         // Make sure result is not a false positive.
486         if (verify_location(location, path)) {
487                 *size = (jlong)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
488                 return offset;
489         }
490     }
491     return 0;            // not found
492 }
493 
494 // Verify that a found location matches the supplied path (without copying.)
verify_location(ImageLocation & location,const char * path) const495 bool ImageFileReader::verify_location(ImageLocation& location, const char* path) const {
496     // Manage the image string table.
497     ImageStrings strings(_string_bytes, _header.strings_size(_endian));
498     // Position to first character of the path string.
499     const char* next = path;
500     // Get module name string.
501     const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
502     // If module string is not empty.
503     if (*module != '\0') {
504         // Compare '/module/' .
505         if (*next++ != '/') return false;
506         if (!(next = ImageStrings::starts_with(next, module))) return false;
507         if (*next++ != '/') return false;
508     }
509     // Get parent (package) string
510     const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
511     // If parent string is not empty string.
512     if (*parent != '\0') {
513         // Compare 'parent/' .
514         if (!(next = ImageStrings::starts_with(next, parent))) return false;
515         if (*next++ != '/') return false;
516     }
517     // Get base name string.
518     const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
519     // Compare with basne name.
520     if (!(next = ImageStrings::starts_with(next, base))) return false;
521     // Get extension string.
522     const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
523     // If extension is not empty.
524     if (*extension != '\0') {
525         // Compare '.extension' .
526         if (*next++ != '.') return false;
527         if (!(next = ImageStrings::starts_with(next, extension))) return false;
528     }
529     // True only if complete match and no more characters.
530     return *next == '\0';
531 }
532 
533 // Return the resource for the supplied location offset.
get_resource(u4 offset,u1 * uncompressed_data) const534 void ImageFileReader::get_resource(u4 offset, u1* uncompressed_data) const {
535         // Get address of first byte of location attribute stream.
536         u1* data = get_location_offset_data(offset);
537         // Expand location attributes.
538         ImageLocation location(data);
539         // Read the data
540         get_resource(location, uncompressed_data);
541 }
542 
543 // Return the resource for the supplied location.
get_resource(ImageLocation & location,u1 * uncompressed_data) const544 void ImageFileReader::get_resource(ImageLocation& location, u1* uncompressed_data) const {
545     // Retrieve the byte offset and size of the resource.
546     u8 offset = location.get_attribute(ImageLocation::ATTRIBUTE_OFFSET);
547     u8 uncompressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
548     u8 compressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_COMPRESSED);
549     // If the resource is compressed.
550     if (compressed_size != 0) {
551         u1* compressed_data;
552         // If not memory mapped read in bytes.
553         if (!memory_map_image) {
554             // Allocate buffer for compression.
555             compressed_data = new u1[(size_t)compressed_size];
556             assert(compressed_data != NULL && "allocation failed");
557             // Read bytes from offset beyond the image index.
558             bool is_read = read_at(compressed_data, compressed_size, _index_size + offset);
559             assert(is_read && "error reading from image or short read");
560         } else {
561             compressed_data = get_data_address() + offset;
562         }
563         // Get image string table.
564         const ImageStrings strings = get_strings();
565         // Decompress resource.
566         ImageDecompressor::decompress_resource(compressed_data, uncompressed_data, uncompressed_size,
567                         &strings, _endian);
568         // If not memory mapped then release temporary buffer.
569         if (!memory_map_image) {
570                 delete[] compressed_data;
571         }
572     } else {
573         // Read bytes from offset beyond the image index.
574         bool is_read = read_at(uncompressed_data, uncompressed_size, _index_size + offset);
575         assert(is_read && "error reading from image or short read");
576     }
577 }
578 
579 // Return the ImageModuleData for this image
get_image_module_data()580 ImageModuleData * ImageFileReader::get_image_module_data() {
581     return _module_data;
582 }
583