1 /*
2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "jvm.h"
27 #include "classfile/compactHashtable.hpp"
28 #include "classfile/javaClasses.hpp"
29 #include "logging/logMessage.hpp"
30 #include "memory/dynamicArchive.hpp"
31 #include "memory/heapShared.inline.hpp"
32 #include "memory/metadataFactory.hpp"
33 #include "memory/metaspaceShared.hpp"
34 #include "runtime/arguments.hpp"
35 #include "runtime/globals.hpp"
36 #include "runtime/vmThread.hpp"
37 #include "utilities/numberSeq.hpp"
38 #include <sys/stat.h>
39 
40 #if INCLUDE_CDS
41 /////////////////////////////////////////////////////
42 //
43 // The compact hash table writer implementations
44 //
CompactHashtableWriter(int num_entries,CompactHashtableStats * stats)45 CompactHashtableWriter::CompactHashtableWriter(int num_entries,
46                                                CompactHashtableStats* stats) {
47   Arguments::assert_is_dumping_archive();
48   assert(num_entries >= 0, "sanity");
49   _num_buckets = calculate_num_buckets(num_entries);
50   assert(_num_buckets > 0, "no buckets");
51 
52   _num_entries_written = 0;
53   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
54   for (int i=0; i<_num_buckets; i++) {
55     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, mtSymbol);
56   }
57 
58   _stats = stats;
59   _compact_buckets = NULL;
60   _compact_entries = NULL;
61   _num_empty_buckets = 0;
62   _num_value_only_buckets = 0;
63   _num_other_buckets = 0;
64 }
65 
~CompactHashtableWriter()66 CompactHashtableWriter::~CompactHashtableWriter() {
67   for (int index = 0; index < _num_buckets; index++) {
68     GrowableArray<Entry>* bucket = _buckets[index];
69     delete bucket;
70   }
71 
72   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
73 }
74 
estimate_size(int num_entries)75 size_t CompactHashtableWriter::estimate_size(int num_entries) {
76   int num_buckets = calculate_num_buckets(num_entries);
77   size_t bucket_bytes = MetaspaceShared::ro_array_bytesize<u4>(num_buckets + 1);
78 
79   // In worst case, we have no VALUE_ONLY_BUCKET_TYPE, so each entry takes 2 slots
80   int entries_space = 2 * num_entries;
81   size_t entry_bytes = MetaspaceShared::ro_array_bytesize<u4>(entries_space);
82 
83   return bucket_bytes
84        + entry_bytes
85        + SimpleCompactHashtable::calculate_header_size();
86 }
87 
88 // Add a symbol entry to the temporary hash table
add(unsigned int hash,u4 value)89 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
90   int index = hash % _num_buckets;
91   _buckets[index]->append_if_missing(Entry(hash, value));
92   _num_entries_written++;
93 }
94 
allocate_table()95 void CompactHashtableWriter::allocate_table() {
96   int entries_space = 0;
97   for (int index = 0; index < _num_buckets; index++) {
98     GrowableArray<Entry>* bucket = _buckets[index];
99     int bucket_size = bucket->length();
100     if (bucket_size == 1) {
101       entries_space++;
102     } else if (bucket_size > 1) {
103       entries_space += 2 * bucket_size;
104     }
105   }
106 
107   if (entries_space & ~BUCKET_OFFSET_MASK) {
108     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
109                                   "Too many entries.");
110   }
111 
112   _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1);
113   _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space);
114 
115   _stats->bucket_count    = _num_buckets;
116   _stats->bucket_bytes    = align_up(_compact_buckets->size() * BytesPerWord,
117                                      SharedSpaceObjectAlignment);
118   _stats->hashentry_count = _num_entries_written;
119   _stats->hashentry_bytes = align_up(_compact_entries->size() * BytesPerWord,
120                                      SharedSpaceObjectAlignment);
121 }
122 
123 // Write the compact table's buckets
dump_table(NumberSeq * summary)124 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
125   u4 offset = 0;
126   for (int index = 0; index < _num_buckets; index++) {
127     GrowableArray<Entry>* bucket = _buckets[index];
128     int bucket_size = bucket->length();
129     if (bucket_size == 1) {
130       // bucket with one entry is compacted and only has the symbol offset
131       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
132 
133       Entry ent = bucket->at(0);
134       _compact_entries->at_put(offset++, ent.value());
135       _num_value_only_buckets++;
136     } else {
137       // regular bucket, each entry is a symbol (hash, offset) pair
138       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
139 
140       for (int i=0; i<bucket_size; i++) {
141         Entry ent = bucket->at(i);
142         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
143         _compact_entries->at_put(offset++, ent.value());
144       }
145       if (bucket_size == 0) {
146         _num_empty_buckets++;
147       } else {
148         _num_other_buckets++;
149       }
150     }
151     summary->add(bucket_size);
152   }
153 
154   // Mark the end of the buckets
155   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
156   assert(offset == (u4)_compact_entries->length(), "sanity");
157 }
158 
159 
160 // Write the compact table
dump(SimpleCompactHashtable * cht,const char * table_name)161 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
162   NumberSeq summary;
163   allocate_table();
164   dump_table(&summary);
165 
166   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
167   address base_address = address(SharedBaseAddress);
168   cht->init(base_address,  _num_entries_written, _num_buckets,
169             _compact_buckets->data(), _compact_entries->data());
170 
171   LogMessage(cds, hashtables) msg;
172   if (msg.is_info()) {
173     double avg_cost = 0.0;
174     if (_num_entries_written > 0) {
175       avg_cost = double(table_bytes)/double(_num_entries_written);
176     }
177     msg.info("Shared %s table stats -------- base: " PTR_FORMAT,
178                          table_name, (intptr_t)base_address);
179     msg.info("Number of entries       : %9d", _num_entries_written);
180     msg.info("Total bytes used        : %9d", table_bytes);
181     msg.info("Average bytes per entry : %9.3f", avg_cost);
182     msg.info("Average bucket size     : %9.3f", summary.avg());
183     msg.info("Variance of bucket size : %9.3f", summary.variance());
184     msg.info("Std. dev. of bucket size: %9.3f", summary.sd());
185     msg.info("Maximum bucket size     : %9d", (int)summary.maximum());
186     msg.info("Empty buckets           : %9d", _num_empty_buckets);
187     msg.info("Value_Only buckets      : %9d", _num_value_only_buckets);
188     msg.info("Other buckets           : %9d", _num_other_buckets);
189   }
190 }
191 
192 /////////////////////////////////////////////////////////////
193 //
194 // The CompactHashtable implementation
195 //
196 
init(address base_address,u4 entry_count,u4 bucket_count,u4 * buckets,u4 * entries)197 void SimpleCompactHashtable::init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
198   _bucket_count = bucket_count;
199   _entry_count = entry_count;
200   _base_address = base_address;
201   if (DynamicDumpSharedSpaces) {
202     _buckets = DynamicArchive::buffer_to_target(buckets);
203     _entries = DynamicArchive::buffer_to_target(entries);
204   } else {
205     _buckets = buckets;
206     _entries = entries;
207   }
208 }
209 
calculate_header_size()210 size_t SimpleCompactHashtable::calculate_header_size() {
211   // We have 5 fields. Each takes up sizeof(intptr_t). See WriteClosure::do_u4
212   size_t bytes = sizeof(intptr_t) * 5;
213   return bytes;
214 }
215 
serialize_header(SerializeClosure * soc)216 void SimpleCompactHashtable::serialize_header(SerializeClosure* soc) {
217   // NOTE: if you change this function, you MUST change the number 5 in
218   // calculate_header_size() accordingly.
219   soc->do_u4(&_entry_count);
220   soc->do_u4(&_bucket_count);
221   soc->do_ptr((void**)&_buckets);
222   soc->do_ptr((void**)&_entries);
223   if (soc->reading()) {
224     _base_address = (address)SharedBaseAddress;
225   }
226 }
227 #endif // INCLUDE_CDS
228 
229 #ifndef O_BINARY       // if defined (Win32) use binary files.
230 #define O_BINARY 0     // otherwise do nothing.
231 #endif
232 
233 ////////////////////////////////////////////////////////
234 //
235 // HashtableTextDump
236 //
HashtableTextDump(const char * filename)237 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
238   struct stat st;
239   if (os::stat(filename, &st) != 0) {
240     quit("Unable to get hashtable dump file size", filename);
241   }
242   _size = st.st_size;
243   _fd = os::open(filename, O_RDONLY | O_BINARY, 0);
244   if (_fd < 0) {
245     quit("Unable to open hashtable dump file", filename);
246   }
247   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
248   if (_base == NULL) {
249     quit("Unable to map hashtable dump file", filename);
250   }
251   _p = _base;
252   _end = _base + st.st_size;
253   _filename = filename;
254   _prefix_type = Unknown;
255   _line_no = 1;
256 }
257 
~HashtableTextDump()258 HashtableTextDump::~HashtableTextDump() {
259   os::unmap_memory((char*)_base, _size);
260   if (_fd >= 0) {
261     close(_fd);
262   }
263 }
264 
quit(const char * err,const char * msg)265 void HashtableTextDump::quit(const char* err, const char* msg) {
266   vm_exit_during_initialization(err, msg);
267 }
268 
corrupted(const char * p,const char * msg)269 void HashtableTextDump::corrupted(const char *p, const char* msg) {
270   char info[100];
271   jio_snprintf(info, sizeof(info),
272                "%s. Corrupted at line %d (file pos %d)",
273                msg, _line_no, (int)(p - _base));
274   quit(info, _filename);
275 }
276 
skip_newline()277 bool HashtableTextDump::skip_newline() {
278   if (_p[0] == '\r' && _p[1] == '\n') {
279     _p += 2;
280   } else if (_p[0] == '\n') {
281     _p += 1;
282   } else {
283     corrupted(_p, "Unexpected character");
284   }
285   _line_no++;
286   return true;
287 }
288 
skip(char must_be_char)289 int HashtableTextDump::skip(char must_be_char) {
290   corrupted_if(remain() < 1, "Truncated");
291   corrupted_if(*_p++ != must_be_char, "Unexpected character");
292   return 0;
293 }
294 
skip_past(char c)295 void HashtableTextDump::skip_past(char c) {
296   for (;;) {
297     corrupted_if(remain() < 1, "Truncated");
298     if (*_p++ == c) {
299       return;
300     }
301   }
302 }
303 
check_version(const char * ver)304 void HashtableTextDump::check_version(const char* ver) {
305   int len = (int)strlen(ver);
306   corrupted_if(remain() < len, "Truncated");
307   if (strncmp(_p, ver, len) != 0) {
308     quit("wrong version of hashtable dump file", _filename);
309   }
310   _p += len;
311   skip_newline();
312 }
313 
scan_prefix_type()314 void HashtableTextDump::scan_prefix_type() {
315   _p++;
316   if (strncmp(_p, "SECTION: String", 15) == 0) {
317     _p += 15;
318     _prefix_type = StringPrefix;
319   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
320     _p += 15;
321     _prefix_type = SymbolPrefix;
322   } else {
323     _prefix_type = Unknown;
324   }
325   skip_newline();
326 }
327 
scan_prefix(int * utf8_length)328 int HashtableTextDump::scan_prefix(int* utf8_length) {
329   if (*_p == '@') {
330     scan_prefix_type();
331   }
332 
333   switch (_prefix_type) {
334   case SymbolPrefix:
335     *utf8_length = scan_symbol_prefix(); break;
336   case StringPrefix:
337     *utf8_length = scan_string_prefix(); break;
338   default:
339     tty->print_cr("Shared input data type: Unknown.");
340     corrupted(_p, "Unknown data type");
341   }
342 
343   return _prefix_type;
344 }
345 
scan_string_prefix()346 int HashtableTextDump::scan_string_prefix() {
347   // Expect /[0-9]+: /
348   int utf8_length = 0;
349   get_num(':', &utf8_length);
350   if (*_p != ' ') {
351     corrupted(_p, "Wrong prefix format for string");
352   }
353   _p++;
354   return utf8_length;
355 }
356 
scan_symbol_prefix()357 int HashtableTextDump::scan_symbol_prefix() {
358   // Expect /[0-9]+ (-|)[0-9]+: /
359   int utf8_length = 0;
360   get_num(' ', &utf8_length);
361   if (*_p == '-') {
362     _p++;
363   }
364   int ref_num;
365   get_num(':', &ref_num);
366   if (*_p != ' ') {
367     corrupted(_p, "Wrong prefix format for symbol");
368   }
369   _p++;
370   return utf8_length;
371 }
372 
unescape(const char * from,const char * end,int count)373 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
374   jchar value = 0;
375 
376   corrupted_if(from + count > end, "Truncated");
377 
378   for (int i=0; i<count; i++) {
379     char c = *from++;
380     switch (c) {
381     case '0': case '1': case '2': case '3': case '4':
382     case '5': case '6': case '7': case '8': case '9':
383       value = (value << 4) + c - '0';
384       break;
385     case 'a': case 'b': case 'c':
386     case 'd': case 'e': case 'f':
387       value = (value << 4) + 10 + c - 'a';
388       break;
389     case 'A': case 'B': case 'C':
390     case 'D': case 'E': case 'F':
391       value = (value << 4) + 10 + c - 'A';
392       break;
393     default:
394       ShouldNotReachHere();
395     }
396   }
397   return value;
398 }
399 
get_utf8(char * utf8_buffer,int utf8_length)400 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
401   // cache in local vars
402   const char* from = _p;
403   const char* end = _end;
404   char* to = utf8_buffer;
405   int n = utf8_length;
406 
407   for (; n > 0 && from < end; n--) {
408     if (*from != '\\') {
409       *to++ = *from++;
410     } else {
411       corrupted_if(from + 2 > end, "Truncated");
412       char c = from[1];
413       from += 2;
414       switch (c) {
415       case 'x':
416         {
417           jchar value = unescape(from, end, 2);
418           from += 2;
419           assert(value <= 0xff, "sanity");
420           *to++ = (char)(value & 0xff);
421         }
422         break;
423       case 't':  *to++ = '\t'; break;
424       case 'n':  *to++ = '\n'; break;
425       case 'r':  *to++ = '\r'; break;
426       case '\\': *to++ = '\\'; break;
427       default:
428         corrupted(_p, "Unsupported character");
429       }
430     }
431   }
432   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
433   _p = from;
434   skip_newline();
435 }
436 
437 // NOTE: the content is NOT the same as
438 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
439 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
440 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
put_utf8(outputStream * st,const char * utf8_string,int utf8_length)441 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
442   const char *c = utf8_string;
443   const char *end = c + utf8_length;
444   for (; c < end; c++) {
445     switch (*c) {
446     case '\t': st->print("\\t"); break;
447     case '\r': st->print("\\r"); break;
448     case '\n': st->print("\\n"); break;
449     case '\\': st->print("\\\\"); break;
450     default:
451       if (isprint(*c)) {
452         st->print("%c", *c);
453       } else {
454         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
455       }
456     }
457   }
458 }
459