1 /* 2 * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * - Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * - Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * - Neither the name of Oracle nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #ifndef LIBJIMAGE_IMAGEFILE_HPP 33 #define LIBJIMAGE_IMAGEFILE_HPP 34 35 #include <assert.h> 36 37 #include "endian.hpp" 38 #include "inttypes.hpp" 39 40 // Image files are an alternate file format for storing classes and resources. The 41 // goal is to supply file access which is faster and smaller than the jar format. 42 // It should be noted that unlike jars, information stored in an image is in native 43 // endian format. This allows the image to be mapped into memory without endian 44 // translation. This also means that images are platform dependent. 45 // 46 // Image files are structured as three sections; 47 // 48 // +-----------+ 49 // | Header | 50 // +-----------+ 51 // | | 52 // | Index | 53 // | | 54 // +-----------+ 55 // | | 56 // | | 57 // | Resources | 58 // | | 59 // | | 60 // +-----------+ 61 // 62 // The header contains information related to identification and description of 63 // contents. 64 // 65 // +-------------------------+ 66 // | Magic (0xCAFEDADA) | 67 // +------------+------------+ 68 // | Major Vers | Minor Vers | 69 // +------------+------------+ 70 // | Flags | 71 // +-------------------------+ 72 // | Resource Count | 73 // +-------------------------+ 74 // | Table Length | 75 // +-------------------------+ 76 // | Attributes Size | 77 // +-------------------------+ 78 // | Strings Size | 79 // +-------------------------+ 80 // 81 // Magic - means of identifying validity of the file. This avoids requiring a 82 // special file extension. 83 // Major vers, minor vers - differences in version numbers indicate structural 84 // changes in the image. 85 // Flags - various image wide flags (future). 86 // Resource count - number of resources in the file. 87 // Table length - the length of lookup tables used in the index. 88 // Attributes size - number of bytes in the region used to store location attribute 89 // streams. 90 // Strings size - the size of the region used to store strings used by the 91 // index and meta data. 92 // 93 // The index contains information related to resource lookup. The algorithm 94 // used for lookup is "A Practical Minimal Perfect Hashing Method" 95 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string 96 // in the form /<module>/<package>/<base>.<extension> return the resource location 97 // information; 98 // 99 // redirectIndex = hash(path, DEFAULT_SEED) % table_length; 100 // redirect = redirectTable[redirectIndex]; 101 // if (redirect == 0) return not found; 102 // locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length; 103 // location = locationTable[locationIndex]; 104 // if (!verify(location, path)) return not found; 105 // return location; 106 // 107 // Note: The hash function takes an initial seed value. A different seed value 108 // usually returns a different result for strings that would otherwise collide with 109 // other seeds. The verify function guarantees the found resource location is 110 // indeed the resource we are looking for. 111 // 112 // The following is the format of the index; 113 // 114 // +-------------------+ 115 // | Redirect Table | 116 // +-------------------+ 117 // | Attribute Offsets | 118 // +-------------------+ 119 // | Attribute Data | 120 // +-------------------+ 121 // | Strings | 122 // +-------------------+ 123 // 124 // Redirect Table - Array of 32-bit signed values representing actions that 125 // should take place for hashed strings that map to that 126 // value. Negative values indicate no hash collision and can be 127 // quickly converted to indices into attribute offsets. Positive 128 // values represent a new seed for hashing an index into attribute 129 // offsets. Zero indicates not found. 130 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into 131 // attribute data. Attribute offsets can be iterated to do a 132 // full survey of resources in the image. Offset of zero 133 // indicates no attributes. 134 // Attribute Data - Bytes representing compact attribute data for locations. (See 135 // comments in ImageLocation.) 136 // Strings - Collection of zero terminated UTF-8 strings used by the index and 137 // image meta data. Each string is accessed by offset. Each string is 138 // unique. Offset zero is reserved for the empty string. 139 // 140 // Note that the memory mapped index assumes 32 bit alignment of each component 141 // in the index. 142 // 143 // Endianness of an image. 144 // An image booted by hotspot is always in native endian. However, it is possible 145 // to read (by the JDK) in alternate endian format. Primarily, this is during 146 // cross platform scenarios. Ex, where javac needs to read an embedded image 147 // to access classes for crossing compilation. 148 // 149 150 class ImageFileReader; // forward declaration 151 152 // Manage image file string table. 153 class ImageStrings { 154 private: 155 u1* _data; // Data bytes for strings. 156 u4 _size; // Number of bytes in the string table. 157 public: 158 enum { 159 // Not found result from find routine. 160 NOT_FOUND = -1, 161 // Prime used to generate hash for Perfect Hashing. 162 HASH_MULTIPLIER = 0x01000193 163 }; 164 ImageStrings(u1 * data,u4 size)165 ImageStrings(u1* data, u4 size) : _data(data), _size(size) {} 166 167 // Return the UTF-8 string beginning at offset. get(u4 offset) const168 inline const char* get(u4 offset) const { 169 assert(offset < _size && "offset exceeds string table size"); 170 return (const char*)(_data + offset); 171 } 172 173 // Compute the Perfect Hashing hash code for the supplied UTF-8 string. hash_code(const char * string)174 inline static u4 hash_code(const char* string) { 175 return hash_code(string, HASH_MULTIPLIER); 176 } 177 178 // Compute the Perfect Hashing hash code for the supplied string, starting at seed. 179 static s4 hash_code(const char* string, s4 seed); 180 181 // Match up a string in a perfect hash table. Result still needs validation 182 // for precise match. 183 static s4 find(Endian* endian, const char* name, s4* redirect, u4 length); 184 185 // Test to see if UTF-8 string begins with the start UTF-8 string. If so, 186 // return non-NULL address of remaining portion of string. Otherwise, return 187 // NULL. Used to test sections of a path without copying from image string 188 // table. 189 static const char* starts_with(const char* string, const char* start); 190 191 // Test to see if UTF-8 string begins with start char. If so, return non-NULL 192 // address of remaining portion of string. Otherwise, return NULL. Used 193 // to test a character of a path without copying. starts_with(const char * string,const char ch)194 inline static const char* starts_with(const char* string, const char ch) { 195 return *string == ch ? string + 1 : NULL; 196 } 197 }; 198 199 // Manage image file location attribute data. Within an image, a location's 200 // attributes are compressed into a stream of bytes. An attribute stream is 201 // composed of individual attribute sequences. Each attribute sequence begins with 202 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the 203 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the 204 // attribute value. Attribute values present as most significant byte first. 205 // 206 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22 207 // (kind = 4, length = 3), 0x03, 0x35, 0x62. 208 // 209 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header 210 // byte of zero.) 211 // 212 // ImageLocation inflates the stream into individual values stored in the long 213 // array _attributes. This allows an attribute value can be quickly accessed by 214 // direct indexing. Unspecified values default to zero. 215 // 216 // Notes: 217 // - Even though ATTRIBUTE_END is used to mark the end of the attribute stream, 218 // streams will contain zero byte values to represent lesser significant bits. 219 // Thus, detecting a zero byte is not sufficient to detect the end of an attribute 220 // stream. 221 // - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region 222 // storing the resources. Thus, in an image this represents the number of bytes 223 // after the index. 224 // - Currently, compressed resources are represented by having a non-zero 225 // ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the 226 // image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the 227 // inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value 228 // of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and 229 // in memory. In the future, additional compression techniques will be used and 230 // represented differently. 231 // - Package strings include trailing slash and extensions include prefix period. 232 // 233 class ImageLocation { 234 public: 235 enum { 236 ATTRIBUTE_END, // End of attribute stream marker 237 ATTRIBUTE_MODULE, // String table offset of module name 238 ATTRIBUTE_PARENT, // String table offset of resource path parent 239 ATTRIBUTE_BASE, // String table offset of resource path base 240 ATTRIBUTE_EXTENSION, // String table offset of resource path extension 241 ATTRIBUTE_OFFSET, // Container byte offset of resource 242 ATTRIBUTE_COMPRESSED, // In image byte size of the compressed resource 243 ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource 244 ATTRIBUTE_COUNT // Number of attribute kinds 245 }; 246 247 private: 248 // Values of inflated attributes. 249 u8 _attributes[ATTRIBUTE_COUNT]; 250 251 // Return the attribute value number of bytes. attribute_length(u1 data)252 inline static u1 attribute_length(u1 data) { 253 return (data & 0x7) + 1; 254 } 255 256 // Return the attribute kind. attribute_kind(u1 data)257 inline static u1 attribute_kind(u1 data) { 258 u1 kind = data >> 3; 259 assert(kind < ATTRIBUTE_COUNT && "invalid attribute kind"); 260 return kind; 261 } 262 263 // Return the attribute length. attribute_value(u1 * data,u1 n)264 inline static u8 attribute_value(u1* data, u1 n) { 265 assert(0 < n && n <= 8 && "invalid attribute value length"); 266 u8 value = 0; 267 // Most significant bytes first. 268 for (u1 i = 0; i < n; i++) { 269 value <<= 8; 270 value |= data[i]; 271 } 272 return value; 273 } 274 275 public: ImageLocation()276 ImageLocation() { 277 clear_data(); 278 } 279 ImageLocation(u1 * data)280 ImageLocation(u1* data) { 281 clear_data(); 282 set_data(data); 283 } 284 285 // Inflates the attribute stream into individual values stored in the long 286 // array _attributes. This allows an attribute value to be quickly accessed by 287 // direct indexing. Unspecified values default to zero. 288 void set_data(u1* data); 289 290 // Zero all attribute values. 291 void clear_data(); 292 293 // Retrieve an attribute value from the inflated array. get_attribute(u1 kind) const294 inline u8 get_attribute(u1 kind) const { 295 assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT && "invalid attribute kind"); 296 return _attributes[kind]; 297 } 298 299 // Retrieve an attribute string value from the inflated array. get_attribute(u4 kind,const ImageStrings & strings) const300 inline const char* get_attribute(u4 kind, const ImageStrings& strings) const { 301 return strings.get((u4)get_attribute(kind)); 302 } 303 }; 304 305 // 306 // Manage the image module meta data. 307 class ImageModuleData { 308 const ImageFileReader* _image_file; // Source image file 309 Endian* _endian; // Endian handler 310 311 public: 312 ImageModuleData(const ImageFileReader* image_file); 313 ~ImageModuleData(); 314 315 // Return the module in which a package resides. Returns NULL if not found. 316 const char* package_to_module(const char* package_name); 317 }; 318 319 // Image file header, starting at offset 0. 320 class ImageHeader { 321 private: 322 u4 _magic; // Image file marker 323 u4 _version; // Image file major version number 324 u4 _flags; // Image file flags 325 u4 _resource_count; // Number of resources in file 326 u4 _table_length; // Number of slots in index tables 327 u4 _locations_size; // Number of bytes in attribute table 328 u4 _strings_size; // Number of bytes in string table 329 330 public: magic() const331 u4 magic() const { return _magic; } magic(Endian * endian) const332 u4 magic(Endian* endian) const { return endian->get(_magic); } set_magic(Endian * endian,u4 magic)333 void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); } 334 major_version(Endian * endian) const335 u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; } minor_version(Endian * endian) const336 u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; } set_version(Endian * endian,u4 major_version,u4 minor_version)337 void set_version(Endian* endian, u4 major_version, u4 minor_version) { 338 return endian->set(_version, major_version << 16 | minor_version); 339 } 340 flags(Endian * endian) const341 u4 flags(Endian* endian) const { return endian->get(_flags); } set_flags(Endian * endian,u4 value)342 void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); } 343 resource_count(Endian * endian) const344 u4 resource_count(Endian* endian) const { return endian->get(_resource_count); } set_resource_count(Endian * endian,u4 count)345 void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); } 346 table_length(Endian * endian) const347 u4 table_length(Endian* endian) const { return endian->get(_table_length); } set_table_length(Endian * endian,u4 count)348 void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); } 349 locations_size(Endian * endian) const350 u4 locations_size(Endian* endian) const { return endian->get(_locations_size); } set_locations_size(Endian * endian,u4 size)351 void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); } 352 strings_size(Endian * endian) const353 u4 strings_size(Endian* endian) const { return endian->get(_strings_size); } set_strings_size(Endian * endian,u4 size)354 void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); } 355 }; 356 357 // Max path length limit independent of platform. Windows max path is 1024, 358 // other platforms use 4096. The JCK fails several tests when 1024 is used. 359 #define IMAGE_MAX_PATH 4096 360 361 class ImageFileReader; 362 363 // Manage a table of open image files. This table allows multiple access points 364 // to share an open image. 365 class ImageFileReaderTable { 366 private: 367 const static u4 _growth = 8; // Growth rate of the table 368 u4 _count; // Number of entries in the table 369 u4 _max; // Maximum number of entries allocated 370 ImageFileReader** _table; // Growable array of entries 371 372 public: 373 ImageFileReaderTable(); 374 ~ImageFileReaderTable(); 375 376 // Return the number of entries. count()377 inline u4 count() { return _count; } 378 379 // Return the ith entry from the table. get(u4 i)380 inline ImageFileReader* get(u4 i) { return _table[i]; } 381 382 // Add a new image entry to the table. 383 void add(ImageFileReader* image); 384 385 // Remove an image entry from the table. 386 void remove(ImageFileReader* image); 387 388 // Determine if image entry is in table. 389 bool contains(ImageFileReader* image); 390 }; 391 392 // Manage the image file. 393 // ImageFileReader manages the content of an image file. 394 // Initially, the header of the image file is read for validation. If valid, 395 // values in the header are used calculate the size of the image index. The 396 // index is then memory mapped to allow load on demand and sharing. The 397 // -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.) 398 // An image can be used by Hotspot and multiple reference points in the JDK, thus 399 // it is desirable to share a reader. To accomodate sharing, a share table is 400 // defined (see ImageFileReaderTable in imageFile.cpp) To track the number of 401 // uses, ImageFileReader keeps a use count (_use). Use is incremented when 402 // 'opened' by reference point and decremented when 'closed'. Use of zero 403 // leads the ImageFileReader to be actually closed and discarded. 404 class ImageFileReader { 405 friend class ImageFileReaderTable; 406 private: 407 // Manage a number of image files such that an image can be shared across 408 // multiple uses (ex. loader.) 409 static ImageFileReaderTable _reader_table; 410 411 // true if image should be fully memory mapped. 412 static bool memory_map_image; 413 414 char* _name; // Name of image 415 s4 _use; // Use count 416 int _fd; // File descriptor 417 Endian* _endian; // Endian handler 418 u8 _file_size; // File size in bytes 419 ImageHeader _header; // Image header 420 size_t _index_size; // Total size of index 421 u1* _index_data; // Raw index data 422 s4* _redirect_table; // Perfect hash redirect table 423 u4* _offsets_table; // Location offset table 424 u1* _location_bytes; // Location attributes 425 u1* _string_bytes; // String table 426 ImageModuleData *module_data; // The ImageModuleData for this image 427 428 ImageFileReader(const char* name, bool big_endian); 429 ~ImageFileReader(); 430 431 // Compute number of bytes in image file index. index_size()432 inline size_t index_size() { 433 return sizeof(ImageHeader) + 434 table_length() * sizeof(u4) * 2 + locations_size() + strings_size(); 435 } 436 437 public: 438 enum { 439 // Image file marker. 440 IMAGE_MAGIC = 0xCAFEDADA, 441 // Endian inverted Image file marker. 442 IMAGE_MAGIC_INVERT = 0xDADAFECA, 443 // Image file major version number. 444 MAJOR_VERSION = 1, 445 // Image file minor version number. 446 MINOR_VERSION = 0 447 }; 448 449 // Locate an image if file already open. 450 static ImageFileReader* find_image(const char* name); 451 452 // Open an image file, reuse structure if file already open. 453 static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian()); 454 455 // Close an image file if the file is not in use elsewhere. 456 static void close(ImageFileReader *reader); 457 458 // Return an id for the specifed ImageFileReader. 459 static u8 reader_to_ID(ImageFileReader *reader); 460 461 // Validate the image id. 462 static bool id_check(u8 id); 463 464 // Return an id for the specifed ImageFileReader. 465 static ImageFileReader* id_to_reader(u8 id); 466 467 // Open image file for read access. 468 bool open(); 469 470 // Close image file. 471 void close(); 472 473 // Read directly from the file. 474 bool read_at(u1* data, u8 size, u8 offset) const; 475 endian() const476 inline Endian* endian() const { return _endian; } 477 478 // Retrieve name of image file. name() const479 inline const char* name() const { 480 return _name; 481 } 482 483 // Retrieve size of image file. file_size() const484 inline u8 file_size() const { 485 return _file_size; 486 } 487 488 // Retrieve the size of the mapped image. map_size() const489 inline u8 map_size() const { 490 return (u8)(memory_map_image ? _file_size : _index_size); 491 } 492 493 // Return first address of index data. get_index_address() const494 inline u1* get_index_address() const { 495 return _index_data; 496 } 497 498 // Return first address of resource data. get_data_address() const499 inline u1* get_data_address() const { 500 return _index_data + _index_size; 501 } 502 503 // Get the size of the index data. get_index_size() const504 size_t get_index_size() const { 505 return _index_size; 506 } 507 table_length() const508 inline u4 table_length() const { 509 return _header.table_length(_endian); 510 } 511 locations_size() const512 inline u4 locations_size() const { 513 return _header.locations_size(_endian); 514 } 515 strings_size() const516 inline u4 strings_size()const { 517 return _header.strings_size(_endian); 518 } 519 offsets_table() const520 inline u4* offsets_table() const { 521 return _offsets_table; 522 } 523 524 // Increment use count. inc_use()525 inline void inc_use() { 526 _use++; 527 } 528 529 // Decrement use count. dec_use()530 inline bool dec_use() { 531 return --_use == 0; 532 } 533 534 // Return a string table accessor. get_strings() const535 inline const ImageStrings get_strings() const { 536 return ImageStrings(_string_bytes, _header.strings_size(_endian)); 537 } 538 539 // Return location attribute stream at offset. get_location_offset_data(u4 offset) const540 inline u1* get_location_offset_data(u4 offset) const { 541 assert((u4)offset < _header.locations_size(_endian) && 542 "offset exceeds location attributes size"); 543 return offset != 0 ? _location_bytes + offset : NULL; 544 } 545 546 // Return location attribute stream for location i. get_location_data(u4 index) const547 inline u1* get_location_data(u4 index) const { 548 return get_location_offset_data(get_location_offset(index)); 549 } 550 551 // Return the location offset for index. get_location_offset(u4 index) const552 inline u4 get_location_offset(u4 index) const { 553 assert((u4)index < _header.table_length(_endian) && 554 "index exceeds location count"); 555 return _endian->get(_offsets_table[index]); 556 } 557 558 // Find the location attributes associated with the path. Returns true if 559 // the location is found, false otherwise. 560 bool find_location(const char* path, ImageLocation& location) const; 561 562 // Find the location index and size associated with the path. 563 // Returns the location index and size if the location is found, 564 // ImageFileReader::NOT_FOUND otherwise. 565 u4 find_location_index(const char* path, u8 *size) const; 566 567 // Verify that a found location matches the supplied path. 568 bool verify_location(ImageLocation& location, const char* path) const; 569 570 // Return the resource for the supplied location index. 571 void get_resource(u4 index, u1* uncompressed_data) const; 572 573 // Return the resource for the supplied path. 574 void get_resource(ImageLocation& location, u1* uncompressed_data) const; 575 576 // Return the ImageModuleData for this image 577 ImageModuleData * get_image_module_data(); 578 579 }; 580 #endif // LIBJIMAGE_IMAGEFILE_HPP 581