1 /*
2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "jvm.h"
27 #include "classfile/compactHashtable.inline.hpp"
28 #include "classfile/javaClasses.hpp"
29 #include "logging/logMessage.hpp"
30 #include "memory/heapShared.inline.hpp"
31 #include "memory/metadataFactory.hpp"
32 #include "memory/metaspaceShared.hpp"
33 #include "oops/compressedOops.inline.hpp"
34 #include "runtime/vmThread.hpp"
35 #include "utilities/numberSeq.hpp"
36 #include <sys/stat.h>
37 
38 /////////////////////////////////////////////////////
39 //
40 // The compact hash table writer implementations
41 //
CompactHashtableWriter(int num_buckets,CompactHashtableStats * stats)42 CompactHashtableWriter::CompactHashtableWriter(int num_buckets,
43                                                CompactHashtableStats* stats) {
44   assert(DumpSharedSpaces, "dump-time only");
45   assert(num_buckets > 0, "no buckets");
46   _num_buckets = num_buckets;
47   _num_entries = 0;
48   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
49   for (int i=0; i<_num_buckets; i++) {
50     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol);
51   }
52 
53   _stats = stats;
54   _compact_buckets = NULL;
55   _compact_entries = NULL;
56   _num_empty_buckets = 0;
57   _num_value_only_buckets = 0;
58   _num_other_buckets = 0;
59 }
60 
~CompactHashtableWriter()61 CompactHashtableWriter::~CompactHashtableWriter() {
62   for (int index = 0; index < _num_buckets; index++) {
63     GrowableArray<Entry>* bucket = _buckets[index];
64     delete bucket;
65   }
66 
67   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
68 }
69 
70 // Add a symbol entry to the temporary hash table
add(unsigned int hash,u4 value)71 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
72   int index = hash % _num_buckets;
73   _buckets[index]->append_if_missing(Entry(hash, value));
74   _num_entries++;
75 }
76 
allocate_table()77 void CompactHashtableWriter::allocate_table() {
78   int entries_space = 0;
79   for (int index = 0; index < _num_buckets; index++) {
80     GrowableArray<Entry>* bucket = _buckets[index];
81     int bucket_size = bucket->length();
82     if (bucket_size == 1) {
83       entries_space++;
84     } else {
85       entries_space += 2 * bucket_size;
86     }
87   }
88 
89   if (entries_space & ~BUCKET_OFFSET_MASK) {
90     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
91                                   "Too many entries.");
92   }
93 
94   _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1);
95   _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space);
96 
97   _stats->bucket_count    = _num_buckets;
98   _stats->bucket_bytes    = _compact_buckets->size() * BytesPerWord;
99   _stats->hashentry_count = _num_entries;
100   _stats->hashentry_bytes = _compact_entries->size() * BytesPerWord;
101 }
102 
103 // Write the compact table's buckets
dump_table(NumberSeq * summary)104 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
105   u4 offset = 0;
106   for (int index = 0; index < _num_buckets; index++) {
107     GrowableArray<Entry>* bucket = _buckets[index];
108     int bucket_size = bucket->length();
109     if (bucket_size == 1) {
110       // bucket with one entry is compacted and only has the symbol offset
111       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
112 
113       Entry ent = bucket->at(0);
114       _compact_entries->at_put(offset++, ent.value());
115       _num_value_only_buckets++;
116     } else {
117       // regular bucket, each entry is a symbol (hash, offset) pair
118       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
119 
120       for (int i=0; i<bucket_size; i++) {
121         Entry ent = bucket->at(i);
122         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
123         _compact_entries->at_put(offset++, ent.value());
124       }
125       if (bucket_size == 0) {
126         _num_empty_buckets++;
127       } else {
128         _num_other_buckets++;
129       }
130     }
131     summary->add(bucket_size);
132   }
133 
134   // Mark the end of the buckets
135   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
136   assert(offset == (u4)_compact_entries->length(), "sanity");
137 }
138 
139 
140 // Write the compact table
dump(SimpleCompactHashtable * cht,const char * table_name)141 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
142   NumberSeq summary;
143   allocate_table();
144   dump_table(&summary);
145 
146   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
147   address base_address = address(MetaspaceShared::shared_rs()->base());
148   cht->init(base_address,  _num_entries, _num_buckets,
149             _compact_buckets->data(), _compact_entries->data());
150 
151   LogMessage(cds, hashtables) msg;
152   if (msg.is_info()) {
153     double avg_cost = 0.0;
154     if (_num_entries > 0) {
155       avg_cost = double(table_bytes)/double(_num_entries);
156     }
157     msg.info("Shared %s table stats -------- base: " PTR_FORMAT,
158                          table_name, (intptr_t)base_address);
159     msg.info("Number of entries       : %9d", _num_entries);
160     msg.info("Total bytes used        : %9d", table_bytes);
161     msg.info("Average bytes per entry : %9.3f", avg_cost);
162     msg.info("Average bucket size     : %9.3f", summary.avg());
163     msg.info("Variance of bucket size : %9.3f", summary.variance());
164     msg.info("Std. dev. of bucket size: %9.3f", summary.sd());
165     msg.info("Empty buckets           : %9d", _num_empty_buckets);
166     msg.info("Value_Only buckets      : %9d", _num_value_only_buckets);
167     msg.info("Other buckets           : %9d", _num_other_buckets);
168   }
169 }
170 
171 /////////////////////////////////////////////////////////////
172 //
173 // Customization for dumping Symbol and String tables
174 
add(unsigned int hash,Symbol * symbol)175 void CompactSymbolTableWriter::add(unsigned int hash, Symbol *symbol) {
176   uintx deltax = MetaspaceShared::object_delta(symbol);
177   // When the symbols are stored into the archive, we already check that
178   // they won't be more than MAX_SHARED_DELTA from the base address, or
179   // else the dumping would have been aborted.
180   assert(deltax <= MAX_SHARED_DELTA, "must not be");
181   u4 delta = u4(deltax);
182 
183   CompactHashtableWriter::add(hash, delta);
184 }
185 
add(unsigned int hash,oop string)186 void CompactStringTableWriter::add(unsigned int hash, oop string) {
187   CompactHashtableWriter::add(hash, CompressedOops::encode(string));
188 }
189 
dump(CompactHashtable<Symbol *,char> * cht)190 void CompactSymbolTableWriter::dump(CompactHashtable<Symbol*, char> *cht) {
191   CompactHashtableWriter::dump(cht, "symbol");
192 }
193 
dump(CompactHashtable<oop,char> * cht)194 void CompactStringTableWriter::dump(CompactHashtable<oop, char> *cht) {
195   CompactHashtableWriter::dump(cht, "string");
196 }
197 
198 /////////////////////////////////////////////////////////////
199 //
200 // The CompactHashtable implementation
201 //
202 
serialize(SerializeClosure * soc)203 void SimpleCompactHashtable::serialize(SerializeClosure* soc) {
204   soc->do_ptr((void**)&_base_address);
205   soc->do_u4(&_entry_count);
206   soc->do_u4(&_bucket_count);
207   soc->do_ptr((void**)&_buckets);
208   soc->do_ptr((void**)&_entries);
209 }
210 
exists(u4 value)211 bool SimpleCompactHashtable::exists(u4 value) {
212   assert(!DumpSharedSpaces, "run-time only");
213 
214   if (_entry_count == 0) {
215     return false;
216   }
217 
218   unsigned int hash = (unsigned int)value;
219   int index = hash % _bucket_count;
220   u4 bucket_info = _buckets[index];
221   u4 bucket_offset = BUCKET_OFFSET(bucket_info);
222   int bucket_type = BUCKET_TYPE(bucket_info);
223   u4* entry = _entries + bucket_offset;
224 
225   if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
226     return (entry[0] == value);
227   } else {
228     u4*entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);
229     while (entry <entry_max) {
230       if (entry[1] == value) {
231         return true;
232       }
233       entry += 2;
234     }
235     return false;
236   }
237 }
238 
239 template <class I>
iterate(const I & iterator)240 inline void SimpleCompactHashtable::iterate(const I& iterator) {
241   for (u4 i = 0; i < _bucket_count; i++) {
242     u4 bucket_info = _buckets[i];
243     u4 bucket_offset = BUCKET_OFFSET(bucket_info);
244     int bucket_type = BUCKET_TYPE(bucket_info);
245     u4* entry = _entries + bucket_offset;
246 
247     if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
248       iterator.do_value(_base_address, entry[0]);
249     } else {
250       u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]);
251       while (entry < entry_max) {
252         iterator.do_value(_base_address, entry[1]);
253         entry += 2;
254       }
255     }
256   }
257 }
258 
serialize(SerializeClosure * soc)259 template <class T, class N> void CompactHashtable<T, N>::serialize(SerializeClosure* soc) {
260   SimpleCompactHashtable::serialize(soc);
261   soc->do_u4(&_type);
262 }
263 
264 class CompactHashtable_SymbolIterator {
265   SymbolClosure* const _closure;
266 public:
CompactHashtable_SymbolIterator(SymbolClosure * cl)267   CompactHashtable_SymbolIterator(SymbolClosure *cl) : _closure(cl) {}
do_value(address base_address,u4 offset) const268   inline void do_value(address base_address, u4 offset) const {
269     Symbol* sym = (Symbol*)((void*)(base_address + offset));
270     _closure->do_symbol(&sym);
271   }
272 };
273 
symbols_do(SymbolClosure * cl)274 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure *cl) {
275   CompactHashtable_SymbolIterator iterator(cl);
276   iterate(iterator);
277 }
278 
279 class CompactHashtable_OopIterator {
280   OopClosure* const _closure;
281 public:
CompactHashtable_OopIterator(OopClosure * cl)282   CompactHashtable_OopIterator(OopClosure *cl) : _closure(cl) {}
do_value(address base_address,u4 offset) const283   inline void do_value(address base_address, u4 offset) const {
284     narrowOop v = (narrowOop)offset;
285     oop obj = HeapShared::decode_from_archive(v);
286     _closure->do_oop(&obj);
287   }
288 };
289 
oops_do(OopClosure * cl)290 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure* cl) {
291   assert(_type == _string_table || _bucket_count == 0, "sanity");
292   CompactHashtable_OopIterator iterator(cl);
293   iterate(iterator);
294 }
295 
296 // Explicitly instantiate these types
297 template class CompactHashtable<Symbol*, char>;
298 template class CompactHashtable<oop, char>;
299 
300 #ifndef O_BINARY       // if defined (Win32) use binary files.
301 #define O_BINARY 0     // otherwise do nothing.
302 #endif
303 
304 ////////////////////////////////////////////////////////
305 //
306 // HashtableTextDump
307 //
HashtableTextDump(const char * filename)308 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
309   struct stat st;
310   if (os::stat(filename, &st) != 0) {
311     quit("Unable to get hashtable dump file size", filename);
312   }
313   _size = st.st_size;
314   _fd = os::open(filename, O_RDONLY | O_BINARY, 0);
315   if (_fd < 0) {
316     quit("Unable to open hashtable dump file", filename);
317   }
318   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
319   if (_base == NULL) {
320     quit("Unable to map hashtable dump file", filename);
321   }
322   _p = _base;
323   _end = _base + st.st_size;
324   _filename = filename;
325   _prefix_type = Unknown;
326   _line_no = 1;
327 }
328 
~HashtableTextDump()329 HashtableTextDump::~HashtableTextDump() {
330   os::unmap_memory((char*)_base, _size);
331   if (_fd >= 0) {
332     close(_fd);
333   }
334 }
335 
quit(const char * err,const char * msg)336 void HashtableTextDump::quit(const char* err, const char* msg) {
337   vm_exit_during_initialization(err, msg);
338 }
339 
corrupted(const char * p,const char * msg)340 void HashtableTextDump::corrupted(const char *p, const char* msg) {
341   char info[100];
342   jio_snprintf(info, sizeof(info),
343                "%s. Corrupted at line %d (file pos %d)",
344                msg, _line_no, (int)(p - _base));
345   quit(info, _filename);
346 }
347 
skip_newline()348 bool HashtableTextDump::skip_newline() {
349   if (_p[0] == '\r' && _p[1] == '\n') {
350     _p += 2;
351   } else if (_p[0] == '\n') {
352     _p += 1;
353   } else {
354     corrupted(_p, "Unexpected character");
355   }
356   _line_no++;
357   return true;
358 }
359 
skip(char must_be_char)360 int HashtableTextDump::skip(char must_be_char) {
361   corrupted_if(remain() < 1, "Truncated");
362   corrupted_if(*_p++ != must_be_char, "Unexpected character");
363   return 0;
364 }
365 
skip_past(char c)366 void HashtableTextDump::skip_past(char c) {
367   for (;;) {
368     corrupted_if(remain() < 1, "Truncated");
369     if (*_p++ == c) {
370       return;
371     }
372   }
373 }
374 
check_version(const char * ver)375 void HashtableTextDump::check_version(const char* ver) {
376   int len = (int)strlen(ver);
377   corrupted_if(remain() < len, "Truncated");
378   if (strncmp(_p, ver, len) != 0) {
379     quit("wrong version of hashtable dump file", _filename);
380   }
381   _p += len;
382   skip_newline();
383 }
384 
scan_prefix_type()385 void HashtableTextDump::scan_prefix_type() {
386   _p++;
387   if (strncmp(_p, "SECTION: String", 15) == 0) {
388     _p += 15;
389     _prefix_type = StringPrefix;
390   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
391     _p += 15;
392     _prefix_type = SymbolPrefix;
393   } else {
394     _prefix_type = Unknown;
395   }
396   skip_newline();
397 }
398 
scan_prefix(int * utf8_length)399 int HashtableTextDump::scan_prefix(int* utf8_length) {
400   if (*_p == '@') {
401     scan_prefix_type();
402   }
403 
404   switch (_prefix_type) {
405   case SymbolPrefix:
406     *utf8_length = scan_symbol_prefix(); break;
407   case StringPrefix:
408     *utf8_length = scan_string_prefix(); break;
409   default:
410     tty->print_cr("Shared input data type: Unknown.");
411     corrupted(_p, "Unknown data type");
412   }
413 
414   return _prefix_type;
415 }
416 
scan_string_prefix()417 int HashtableTextDump::scan_string_prefix() {
418   // Expect /[0-9]+: /
419   int utf8_length = 0;
420   get_num(':', &utf8_length);
421   if (*_p != ' ') {
422     corrupted(_p, "Wrong prefix format for string");
423   }
424   _p++;
425   return utf8_length;
426 }
427 
scan_symbol_prefix()428 int HashtableTextDump::scan_symbol_prefix() {
429   // Expect /[0-9]+ (-|)[0-9]+: /
430   int utf8_length = 0;
431   get_num(' ', &utf8_length);
432   if (*_p == '-') {
433     _p++;
434   }
435   int ref_num;
436   get_num(':', &ref_num);
437   if (*_p != ' ') {
438     corrupted(_p, "Wrong prefix format for symbol");
439   }
440   _p++;
441   return utf8_length;
442 }
443 
unescape(const char * from,const char * end,int count)444 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
445   jchar value = 0;
446 
447   corrupted_if(from + count > end, "Truncated");
448 
449   for (int i=0; i<count; i++) {
450     char c = *from++;
451     switch (c) {
452     case '0': case '1': case '2': case '3': case '4':
453     case '5': case '6': case '7': case '8': case '9':
454       value = (value << 4) + c - '0';
455       break;
456     case 'a': case 'b': case 'c':
457     case 'd': case 'e': case 'f':
458       value = (value << 4) + 10 + c - 'a';
459       break;
460     case 'A': case 'B': case 'C':
461     case 'D': case 'E': case 'F':
462       value = (value << 4) + 10 + c - 'A';
463       break;
464     default:
465       ShouldNotReachHere();
466     }
467   }
468   return value;
469 }
470 
get_utf8(char * utf8_buffer,int utf8_length)471 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
472   // cache in local vars
473   const char* from = _p;
474   const char* end = _end;
475   char* to = utf8_buffer;
476   int n = utf8_length;
477 
478   for (; n > 0 && from < end; n--) {
479     if (*from != '\\') {
480       *to++ = *from++;
481     } else {
482       corrupted_if(from + 2 > end, "Truncated");
483       char c = from[1];
484       from += 2;
485       switch (c) {
486       case 'x':
487         {
488           jchar value = unescape(from, end, 2);
489           from += 2;
490           assert(value <= 0xff, "sanity");
491           *to++ = (char)(value & 0xff);
492         }
493         break;
494       case 't':  *to++ = '\t'; break;
495       case 'n':  *to++ = '\n'; break;
496       case 'r':  *to++ = '\r'; break;
497       case '\\': *to++ = '\\'; break;
498       default:
499         corrupted(_p, "Unsupported character");
500       }
501     }
502   }
503   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
504   _p = from;
505   skip_newline();
506 }
507 
508 // NOTE: the content is NOT the same as
509 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
510 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
511 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
put_utf8(outputStream * st,const char * utf8_string,int utf8_length)512 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
513   const char *c = utf8_string;
514   const char *end = c + utf8_length;
515   for (; c < end; c++) {
516     switch (*c) {
517     case '\t': st->print("\\t"); break;
518     case '\r': st->print("\\r"); break;
519     case '\n': st->print("\\n"); break;
520     case '\\': st->print("\\\\"); break;
521     default:
522       if (isprint(*c)) {
523         st->print("%c", *c);
524       } else {
525         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
526       }
527     }
528   }
529 }
530