1 /*
2 * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "jvm.h"
27 #include "classfile/compactHashtable.hpp"
28 #include "classfile/javaClasses.hpp"
29 #include "logging/logMessage.hpp"
30 #include "memory/dynamicArchive.hpp"
31 #include "memory/heapShared.inline.hpp"
32 #include "memory/metadataFactory.hpp"
33 #include "memory/metaspaceShared.hpp"
34 #include "runtime/arguments.hpp"
35 #include "runtime/globals.hpp"
36 #include "runtime/vmThread.hpp"
37 #include "utilities/numberSeq.hpp"
38 #include <sys/stat.h>
39
40 #if INCLUDE_CDS
41 /////////////////////////////////////////////////////
42 //
43 // The compact hash table writer implementations
44 //
CompactHashtableWriter(int num_entries,CompactHashtableStats * stats)45 CompactHashtableWriter::CompactHashtableWriter(int num_entries,
46 CompactHashtableStats* stats) {
47 Arguments::assert_is_dumping_archive();
48 assert(num_entries >= 0, "sanity");
49 _num_buckets = calculate_num_buckets(num_entries);
50 assert(_num_buckets > 0, "no buckets");
51
52 _num_entries_written = 0;
53 _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
54 for (int i=0; i<_num_buckets; i++) {
55 _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, mtSymbol);
56 }
57
58 _stats = stats;
59 _compact_buckets = NULL;
60 _compact_entries = NULL;
61 _num_empty_buckets = 0;
62 _num_value_only_buckets = 0;
63 _num_other_buckets = 0;
64 }
65
~CompactHashtableWriter()66 CompactHashtableWriter::~CompactHashtableWriter() {
67 for (int index = 0; index < _num_buckets; index++) {
68 GrowableArray<Entry>* bucket = _buckets[index];
69 delete bucket;
70 }
71
72 FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
73 }
74
estimate_size(int num_entries)75 size_t CompactHashtableWriter::estimate_size(int num_entries) {
76 int num_buckets = calculate_num_buckets(num_entries);
77 size_t bucket_bytes = MetaspaceShared::ro_array_bytesize<u4>(num_buckets + 1);
78
79 // In worst case, we have no VALUE_ONLY_BUCKET_TYPE, so each entry takes 2 slots
80 int entries_space = 2 * num_entries;
81 size_t entry_bytes = MetaspaceShared::ro_array_bytesize<u4>(entries_space);
82
83 return bucket_bytes
84 + entry_bytes
85 + SimpleCompactHashtable::calculate_header_size();
86 }
87
88 // Add a symbol entry to the temporary hash table
add(unsigned int hash,u4 value)89 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
90 int index = hash % _num_buckets;
91 _buckets[index]->append_if_missing(Entry(hash, value));
92 _num_entries_written++;
93 }
94
allocate_table()95 void CompactHashtableWriter::allocate_table() {
96 int entries_space = 0;
97 for (int index = 0; index < _num_buckets; index++) {
98 GrowableArray<Entry>* bucket = _buckets[index];
99 int bucket_size = bucket->length();
100 if (bucket_size == 1) {
101 entries_space++;
102 } else if (bucket_size > 1) {
103 entries_space += 2 * bucket_size;
104 }
105 }
106
107 if (entries_space & ~BUCKET_OFFSET_MASK) {
108 vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
109 "Too many entries.");
110 }
111
112 _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1);
113 _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space);
114
115 _stats->bucket_count = _num_buckets;
116 _stats->bucket_bytes = align_up(_compact_buckets->size() * BytesPerWord,
117 SharedSpaceObjectAlignment);
118 _stats->hashentry_count = _num_entries_written;
119 _stats->hashentry_bytes = align_up(_compact_entries->size() * BytesPerWord,
120 SharedSpaceObjectAlignment);
121 }
122
123 // Write the compact table's buckets
dump_table(NumberSeq * summary)124 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
125 u4 offset = 0;
126 for (int index = 0; index < _num_buckets; index++) {
127 GrowableArray<Entry>* bucket = _buckets[index];
128 int bucket_size = bucket->length();
129 if (bucket_size == 1) {
130 // bucket with one entry is compacted and only has the symbol offset
131 _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
132
133 Entry ent = bucket->at(0);
134 _compact_entries->at_put(offset++, ent.value());
135 _num_value_only_buckets++;
136 } else {
137 // regular bucket, each entry is a symbol (hash, offset) pair
138 _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
139
140 for (int i=0; i<bucket_size; i++) {
141 Entry ent = bucket->at(i);
142 _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
143 _compact_entries->at_put(offset++, ent.value());
144 }
145 if (bucket_size == 0) {
146 _num_empty_buckets++;
147 } else {
148 _num_other_buckets++;
149 }
150 }
151 summary->add(bucket_size);
152 }
153
154 // Mark the end of the buckets
155 _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
156 assert(offset == (u4)_compact_entries->length(), "sanity");
157 }
158
159
160 // Write the compact table
dump(SimpleCompactHashtable * cht,const char * table_name)161 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
162 NumberSeq summary;
163 allocate_table();
164 dump_table(&summary);
165
166 int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
167 address base_address = address(SharedBaseAddress);
168 cht->init(base_address, _num_entries_written, _num_buckets,
169 _compact_buckets->data(), _compact_entries->data());
170
171 LogMessage(cds, hashtables) msg;
172 if (msg.is_info()) {
173 double avg_cost = 0.0;
174 if (_num_entries_written > 0) {
175 avg_cost = double(table_bytes)/double(_num_entries_written);
176 }
177 msg.info("Shared %s table stats -------- base: " PTR_FORMAT,
178 table_name, (intptr_t)base_address);
179 msg.info("Number of entries : %9d", _num_entries_written);
180 msg.info("Total bytes used : %9d", table_bytes);
181 msg.info("Average bytes per entry : %9.3f", avg_cost);
182 msg.info("Average bucket size : %9.3f", summary.avg());
183 msg.info("Variance of bucket size : %9.3f", summary.variance());
184 msg.info("Std. dev. of bucket size: %9.3f", summary.sd());
185 msg.info("Maximum bucket size : %9d", (int)summary.maximum());
186 msg.info("Empty buckets : %9d", _num_empty_buckets);
187 msg.info("Value_Only buckets : %9d", _num_value_only_buckets);
188 msg.info("Other buckets : %9d", _num_other_buckets);
189 }
190 }
191
192 /////////////////////////////////////////////////////////////
193 //
194 // The CompactHashtable implementation
195 //
196
init(address base_address,u4 entry_count,u4 bucket_count,u4 * buckets,u4 * entries)197 void SimpleCompactHashtable::init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
198 _bucket_count = bucket_count;
199 _entry_count = entry_count;
200 _base_address = base_address;
201 if (DynamicDumpSharedSpaces) {
202 _buckets = DynamicArchive::buffer_to_target(buckets);
203 _entries = DynamicArchive::buffer_to_target(entries);
204 } else {
205 _buckets = buckets;
206 _entries = entries;
207 }
208 }
209
calculate_header_size()210 size_t SimpleCompactHashtable::calculate_header_size() {
211 // We have 5 fields. Each takes up sizeof(intptr_t). See WriteClosure::do_u4
212 size_t bytes = sizeof(intptr_t) * 5;
213 return bytes;
214 }
215
serialize_header(SerializeClosure * soc)216 void SimpleCompactHashtable::serialize_header(SerializeClosure* soc) {
217 // NOTE: if you change this function, you MUST change the number 5 in
218 // calculate_header_size() accordingly.
219 soc->do_u4(&_entry_count);
220 soc->do_u4(&_bucket_count);
221 soc->do_ptr((void**)&_buckets);
222 soc->do_ptr((void**)&_entries);
223 if (soc->reading()) {
224 _base_address = (address)SharedBaseAddress;
225 }
226 }
227 #endif // INCLUDE_CDS
228
229 #ifndef O_BINARY // if defined (Win32) use binary files.
230 #define O_BINARY 0 // otherwise do nothing.
231 #endif
232
233 ////////////////////////////////////////////////////////
234 //
235 // HashtableTextDump
236 //
HashtableTextDump(const char * filename)237 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
238 struct stat st;
239 if (os::stat(filename, &st) != 0) {
240 quit("Unable to get hashtable dump file size", filename);
241 }
242 _size = st.st_size;
243 _fd = os::open(filename, O_RDONLY | O_BINARY, 0);
244 if (_fd < 0) {
245 quit("Unable to open hashtable dump file", filename);
246 }
247 _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
248 if (_base == NULL) {
249 quit("Unable to map hashtable dump file", filename);
250 }
251 _p = _base;
252 _end = _base + st.st_size;
253 _filename = filename;
254 _prefix_type = Unknown;
255 _line_no = 1;
256 }
257
~HashtableTextDump()258 HashtableTextDump::~HashtableTextDump() {
259 os::unmap_memory((char*)_base, _size);
260 if (_fd >= 0) {
261 close(_fd);
262 }
263 }
264
quit(const char * err,const char * msg)265 void HashtableTextDump::quit(const char* err, const char* msg) {
266 vm_exit_during_initialization(err, msg);
267 }
268
corrupted(const char * p,const char * msg)269 void HashtableTextDump::corrupted(const char *p, const char* msg) {
270 char info[100];
271 jio_snprintf(info, sizeof(info),
272 "%s. Corrupted at line %d (file pos %d)",
273 msg, _line_no, (int)(p - _base));
274 quit(info, _filename);
275 }
276
skip_newline()277 bool HashtableTextDump::skip_newline() {
278 if (_p[0] == '\r' && _p[1] == '\n') {
279 _p += 2;
280 } else if (_p[0] == '\n') {
281 _p += 1;
282 } else {
283 corrupted(_p, "Unexpected character");
284 }
285 _line_no++;
286 return true;
287 }
288
skip(char must_be_char)289 int HashtableTextDump::skip(char must_be_char) {
290 corrupted_if(remain() < 1, "Truncated");
291 corrupted_if(*_p++ != must_be_char, "Unexpected character");
292 return 0;
293 }
294
skip_past(char c)295 void HashtableTextDump::skip_past(char c) {
296 for (;;) {
297 corrupted_if(remain() < 1, "Truncated");
298 if (*_p++ == c) {
299 return;
300 }
301 }
302 }
303
check_version(const char * ver)304 void HashtableTextDump::check_version(const char* ver) {
305 int len = (int)strlen(ver);
306 corrupted_if(remain() < len, "Truncated");
307 if (strncmp(_p, ver, len) != 0) {
308 quit("wrong version of hashtable dump file", _filename);
309 }
310 _p += len;
311 skip_newline();
312 }
313
scan_prefix_type()314 void HashtableTextDump::scan_prefix_type() {
315 _p++;
316 if (strncmp(_p, "SECTION: String", 15) == 0) {
317 _p += 15;
318 _prefix_type = StringPrefix;
319 } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
320 _p += 15;
321 _prefix_type = SymbolPrefix;
322 } else {
323 _prefix_type = Unknown;
324 }
325 skip_newline();
326 }
327
scan_prefix(int * utf8_length)328 int HashtableTextDump::scan_prefix(int* utf8_length) {
329 if (*_p == '@') {
330 scan_prefix_type();
331 }
332
333 switch (_prefix_type) {
334 case SymbolPrefix:
335 *utf8_length = scan_symbol_prefix(); break;
336 case StringPrefix:
337 *utf8_length = scan_string_prefix(); break;
338 default:
339 tty->print_cr("Shared input data type: Unknown.");
340 corrupted(_p, "Unknown data type");
341 }
342
343 return _prefix_type;
344 }
345
scan_string_prefix()346 int HashtableTextDump::scan_string_prefix() {
347 // Expect /[0-9]+: /
348 int utf8_length = 0;
349 get_num(':', &utf8_length);
350 if (*_p != ' ') {
351 corrupted(_p, "Wrong prefix format for string");
352 }
353 _p++;
354 return utf8_length;
355 }
356
scan_symbol_prefix()357 int HashtableTextDump::scan_symbol_prefix() {
358 // Expect /[0-9]+ (-|)[0-9]+: /
359 int utf8_length = 0;
360 get_num(' ', &utf8_length);
361 if (*_p == '-') {
362 _p++;
363 }
364 int ref_num;
365 get_num(':', &ref_num);
366 if (*_p != ' ') {
367 corrupted(_p, "Wrong prefix format for symbol");
368 }
369 _p++;
370 return utf8_length;
371 }
372
unescape(const char * from,const char * end,int count)373 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
374 jchar value = 0;
375
376 corrupted_if(from + count > end, "Truncated");
377
378 for (int i=0; i<count; i++) {
379 char c = *from++;
380 switch (c) {
381 case '0': case '1': case '2': case '3': case '4':
382 case '5': case '6': case '7': case '8': case '9':
383 value = (value << 4) + c - '0';
384 break;
385 case 'a': case 'b': case 'c':
386 case 'd': case 'e': case 'f':
387 value = (value << 4) + 10 + c - 'a';
388 break;
389 case 'A': case 'B': case 'C':
390 case 'D': case 'E': case 'F':
391 value = (value << 4) + 10 + c - 'A';
392 break;
393 default:
394 ShouldNotReachHere();
395 }
396 }
397 return value;
398 }
399
get_utf8(char * utf8_buffer,int utf8_length)400 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
401 // cache in local vars
402 const char* from = _p;
403 const char* end = _end;
404 char* to = utf8_buffer;
405 int n = utf8_length;
406
407 for (; n > 0 && from < end; n--) {
408 if (*from != '\\') {
409 *to++ = *from++;
410 } else {
411 corrupted_if(from + 2 > end, "Truncated");
412 char c = from[1];
413 from += 2;
414 switch (c) {
415 case 'x':
416 {
417 jchar value = unescape(from, end, 2);
418 from += 2;
419 assert(value <= 0xff, "sanity");
420 *to++ = (char)(value & 0xff);
421 }
422 break;
423 case 't': *to++ = '\t'; break;
424 case 'n': *to++ = '\n'; break;
425 case 'r': *to++ = '\r'; break;
426 case '\\': *to++ = '\\'; break;
427 default:
428 corrupted(_p, "Unsupported character");
429 }
430 }
431 }
432 corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
433 _p = from;
434 skip_newline();
435 }
436
437 // NOTE: the content is NOT the same as
438 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
439 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
440 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
put_utf8(outputStream * st,const char * utf8_string,int utf8_length)441 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
442 const char *c = utf8_string;
443 const char *end = c + utf8_length;
444 for (; c < end; c++) {
445 switch (*c) {
446 case '\t': st->print("\\t"); break;
447 case '\r': st->print("\\r"); break;
448 case '\n': st->print("\\n"); break;
449 case '\\': st->print("\\\\"); break;
450 default:
451 if (isprint(*c)) {
452 st->print("%c", *c);
453 } else {
454 st->print("\\x%02x", ((unsigned int)*c) & 0xff);
455 }
456 }
457 }
458 }
459