1 /* 2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 27 28 #include "oops/array.hpp" 29 #include "oops/symbol.hpp" 30 #include "utilities/hashtable.hpp" 31 32 template <class T, class N> class CompactHashtable; 33 class NumberSeq; 34 class SimpleCompactHashtable; 35 class SerializeClosure; 36 37 // Stats for symbol tables in the CDS archive 38 class CompactHashtableStats { 39 public: 40 int hashentry_count; 41 int hashentry_bytes; 42 int bucket_count; 43 int bucket_bytes; 44 }; 45 46 ///////////////////////////////////////////////////////////////////////// 47 // 48 // The compact hash table writer. Used at dump time for writing out 49 // the compact table to the shared archive. 50 // 51 // At dump time, the CompactHashtableWriter obtains all entries from the 52 // symbol/string table and adds them to a new temporary hash table. The hash 53 // table size (number of buckets) is calculated using 54 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket 55 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option. 56 // 4 is chosen because it produces smaller sized bucket on average for 57 // faster lookup. It also has relatively small number of empty buckets and 58 // good distribution of the entries. 59 // 60 // We use a simple hash function (hash % num_bucket) for the table. 61 // The new table is compacted when written out. Please see comments 62 // above the CompactHashtable class for the table layout detail. The bucket 63 // offsets are written to the archive as part of the compact table. The 64 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type 65 // (regular or compact) are encoded in bit[31, 30]. For buckets with more 66 // than one entry, both hash and entry offset are written to the 67 // table. For buckets with only one entry, only the entry offset is written 68 // to the table and the buckets are tagged as compact in their type bits. 69 // Buckets without entry are skipped from the table. Their offsets are 70 // still written out for faster lookup. 71 // 72 class CompactHashtableWriter: public StackObj { 73 public: 74 class Entry { 75 unsigned int _hash; 76 u4 _value; 77 78 public: Entry()79 Entry() {} Entry(unsigned int hash,u4 val)80 Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {} 81 value()82 u4 value() { 83 return _value; 84 } hash()85 unsigned int hash() { 86 return _hash; 87 } 88 operator ==(const CompactHashtableWriter::Entry & other)89 bool operator==(const CompactHashtableWriter::Entry& other) { 90 return (_value == other._value && _hash == other._hash); 91 } 92 }; // class CompactHashtableWriter::Entry 93 94 private: 95 int _num_entries; 96 int _num_buckets; 97 int _num_empty_buckets; 98 int _num_value_only_buckets; 99 int _num_other_buckets; 100 GrowableArray<Entry>** _buckets; 101 CompactHashtableStats* _stats; 102 Array<u4>* _compact_buckets; 103 Array<u4>* _compact_entries; 104 105 public: 106 // This is called at dump-time only 107 CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats); 108 ~CompactHashtableWriter(); 109 110 void add(unsigned int hash, u4 value); add(u4 value)111 void add(u4 value) { 112 add((unsigned int)value, value); 113 } 114 115 private: 116 void allocate_table(); 117 void dump_table(NumberSeq* summary); 118 119 public: 120 void dump(SimpleCompactHashtable *cht, const char* table_name); 121 const char* table_name(); 122 }; 123 124 class CompactSymbolTableWriter: public CompactHashtableWriter { 125 public: CompactSymbolTableWriter(int num_buckets,CompactHashtableStats * stats)126 CompactSymbolTableWriter(int num_buckets, CompactHashtableStats* stats) : 127 CompactHashtableWriter(num_buckets, stats) {} 128 void add(unsigned int hash, Symbol *symbol); 129 void dump(CompactHashtable<Symbol*, char> *cht); 130 }; 131 132 class CompactStringTableWriter: public CompactHashtableWriter { 133 public: CompactStringTableWriter(int num_entries,CompactHashtableStats * stats)134 CompactStringTableWriter(int num_entries, CompactHashtableStats* stats) : 135 CompactHashtableWriter(num_entries, stats) {} 136 void add(unsigned int hash, oop string); 137 void dump(CompactHashtable<oop, char> *cht); 138 }; 139 140 #define REGULAR_BUCKET_TYPE 0 141 #define VALUE_ONLY_BUCKET_TYPE 1 142 #define TABLEEND_BUCKET_TYPE 3 143 #define BUCKET_OFFSET_MASK 0x3FFFFFFF 144 #define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK) 145 #define BUCKET_TYPE_SHIFT 30 146 #define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT) 147 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK)) 148 149 ///////////////////////////////////////////////////////////////////////////// 150 // 151 // CompactHashtable is used to stored the CDS archive's symbol/string table. Used 152 // at runtime only to access the compact table from the archive. 153 // 154 // Because these tables are read-only (no entries can be added/deleted) at run-time 155 // and tend to have large number of entries, we try to minimize the footprint 156 // cost per entry. 157 // 158 // The CompactHashtable is split into two arrays 159 // 160 // u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset 161 // u4 entries[<variable size>] 162 // 163 // The size of buckets[] is 'num_buckets + 1'. Each entry of 164 // buckets[] is a 32-bit encoding of the bucket type and bucket offset, 165 // with the type in the left-most 2-bit and offset in the remaining 30-bit. 166 // The last entry is a special type. It contains the end of the last 167 // bucket. 168 // 169 // There are two types of buckets, regular buckets and value_only buckets. The 170 // value_only buckets have '01' in their highest 2-bit, and regular buckets have 171 // '00' in their highest 2-bit. 172 // 173 // For normal buckets, each entry is 8 bytes in the entries[]: 174 // u4 hash; /* symbol/string hash */ 175 // union { 176 // u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */ 177 // narrowOop str; /* String narrowOop encoding */ 178 // } 179 // 180 // 181 // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[]. 182 // 183 // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code 184 // is skipped. 185 // buckets[0, 4, 5, ....] 186 // | | | 187 // | | +---+ 188 // | | | 189 // | +----+ | 190 // v v v 191 // entries[H,O,H,O,O,H,O,H,O.....] 192 // 193 // See CompactHashtable::lookup() for how the table is searched at runtime. 194 // See CompactHashtableWriter::dump() for how the table is written at CDS 195 // dump time. 196 // 197 class SimpleCompactHashtable { 198 protected: 199 address _base_address; 200 u4 _bucket_count; 201 u4 _entry_count; 202 u4* _buckets; 203 u4* _entries; 204 205 public: SimpleCompactHashtable()206 SimpleCompactHashtable() { 207 _entry_count = 0; 208 _bucket_count = 0; 209 _buckets = 0; 210 _entries = 0; 211 } 212 reset()213 void reset() { 214 _bucket_count = 0; 215 _entry_count = 0; 216 _buckets = 0; 217 _entries = 0; 218 } 219 init(address base_address,u4 entry_count,u4 bucket_count,u4 * buckets,u4 * entries)220 void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) { 221 _base_address = base_address; 222 _bucket_count = bucket_count; 223 _entry_count = entry_count; 224 _buckets = buckets; 225 _entries = entries; 226 } 227 228 template <class I> inline void iterate(const I& iterator); 229 230 bool exists(u4 value); 231 232 // For reading from/writing to the CDS archive 233 void serialize(SerializeClosure* soc); 234 }; 235 236 template <class T, class N> class CompactHashtable : public SimpleCompactHashtable { 237 friend class VMStructs; 238 239 public: 240 enum CompactHashtableType { 241 _symbol_table = 0, 242 _string_table = 1 243 }; 244 245 private: 246 u4 _type; 247 248 inline Symbol* decode_entry(CompactHashtable<Symbol*, char>* const t, 249 u4 offset, const char* name, int len); 250 251 inline oop decode_entry(CompactHashtable<oop, char>* const t, 252 u4 offset, const char* name, int len); 253 public: CompactHashtable()254 CompactHashtable() : SimpleCompactHashtable() {} 255 set_type(CompactHashtableType type)256 void set_type(CompactHashtableType type) { 257 _type = (u4)type; 258 } 259 260 // Lookup an entry from the compact table 261 inline T lookup(const N* name, unsigned int hash, int len); 262 263 // iterate over symbols 264 void symbols_do(SymbolClosure *cl); 265 266 // iterate over strings 267 void oops_do(OopClosure* f); 268 269 // For reading from/writing to the CDS archive 270 void serialize(SerializeClosure* soc); 271 base_address()272 uintx base_address() { 273 return (uintx) _base_address; 274 } 275 }; 276 277 //////////////////////////////////////////////////////////////////////// 278 // 279 // Read/Write the contents of a hashtable textual dump (created by 280 // SymbolTable::dump and StringTable::dump). 281 // Because the dump file may be big (hundred of MB in extreme cases), 282 // we use mmap for fast access when reading it. 283 // 284 class HashtableTextDump { 285 int _fd; 286 const char* _base; 287 const char* _p; 288 const char* _end; 289 const char* _filename; 290 size_t _size; 291 int _prefix_type; 292 int _line_no; 293 public: 294 HashtableTextDump(const char* filename); 295 ~HashtableTextDump(); 296 297 enum { 298 SymbolPrefix = 1 << 0, 299 StringPrefix = 1 << 1, 300 Unknown = 1 << 2 301 }; 302 303 void quit(const char* err, const char* msg); 304 remain()305 inline int remain() { 306 return (int)(_end - _p); 307 } 308 309 void corrupted(const char *p, const char *msg); 310 corrupted_if(bool cond,const char * msg)311 inline void corrupted_if(bool cond, const char *msg) { 312 if (cond) { 313 corrupted(_p, msg); 314 } 315 } 316 317 bool skip_newline(); 318 int skip(char must_be_char); 319 void skip_past(char c); 320 void check_version(const char* ver); 321 get_num(char delim,int * num)322 inline void get_num(char delim, int *num) { 323 const char* p = _p; 324 const char* end = _end; 325 u8 n = 0; 326 327 while (p < end) { 328 char c = *p++; 329 if ('0' <= c && c <= '9') { 330 n = n * 10 + (c - '0'); 331 if (n > (u8)INT_MAX) { 332 corrupted(_p, "Num overflow"); 333 } 334 } else if (c == delim) { 335 _p = p; 336 *num = (int)n; 337 return; 338 } else { 339 // Not [0-9], not 'delim' 340 corrupted(_p, "Unrecognized format");; 341 } 342 } 343 344 corrupted(_end, "Incorrect format"); 345 ShouldNotReachHere(); 346 } 347 348 void scan_prefix_type(); 349 int scan_prefix(int* utf8_length); 350 int scan_string_prefix(); 351 int scan_symbol_prefix(); 352 353 jchar unescape(const char* from, const char* end, int count); 354 void get_utf8(char* utf8_buffer, int utf8_length); 355 static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length); 356 }; 357 358 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 359