1 /*
2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "jvm.h"
27 #include "classfile/compactHashtable.hpp"
28 #include "classfile/javaClasses.hpp"
29 #include "logging/logMessage.hpp"
30 #include "memory/dynamicArchive.hpp"
31 #include "memory/heapShared.inline.hpp"
32 #include "memory/metadataFactory.hpp"
33 #include "memory/metaspaceShared.hpp"
34 #include "runtime/vmThread.hpp"
35 #include "utilities/numberSeq.hpp"
36 #include <sys/stat.h>
37 
38 #if INCLUDE_CDS
39 /////////////////////////////////////////////////////
40 //
41 // The compact hash table writer implementations
42 //
CompactHashtableWriter(int num_entries,CompactHashtableStats * stats)43 CompactHashtableWriter::CompactHashtableWriter(int num_entries,
44                                                CompactHashtableStats* stats) {
45   Arguments::assert_is_dumping_archive();
46   assert(num_entries >= 0, "sanity");
47   _num_buckets = calculate_num_buckets(num_entries);
48   assert(_num_buckets > 0, "no buckets");
49 
50   _num_entries_written = 0;
51   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
52   for (int i=0; i<_num_buckets; i++) {
53     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol);
54   }
55 
56   _stats = stats;
57   _compact_buckets = NULL;
58   _compact_entries = NULL;
59   _num_empty_buckets = 0;
60   _num_value_only_buckets = 0;
61   _num_other_buckets = 0;
62 }
63 
~CompactHashtableWriter()64 CompactHashtableWriter::~CompactHashtableWriter() {
65   for (int index = 0; index < _num_buckets; index++) {
66     GrowableArray<Entry>* bucket = _buckets[index];
67     delete bucket;
68   }
69 
70   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
71 }
72 
estimate_size(int num_entries)73 size_t CompactHashtableWriter::estimate_size(int num_entries) {
74   int num_buckets = calculate_num_buckets(num_entries);
75   size_t bucket_bytes = MetaspaceShared::ro_array_bytesize<u4>(num_buckets + 1);
76 
77   // In worst case, we have no VALUE_ONLY_BUCKET_TYPE, so each entry takes 2 slots
78   int entries_space = 2 * num_entries;
79   size_t entry_bytes = MetaspaceShared::ro_array_bytesize<u4>(entries_space);
80 
81   return bucket_bytes
82        + entry_bytes
83        + SimpleCompactHashtable::calculate_header_size();
84 }
85 
86 // Add a symbol entry to the temporary hash table
add(unsigned int hash,u4 value)87 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
88   int index = hash % _num_buckets;
89   _buckets[index]->append_if_missing(Entry(hash, value));
90   _num_entries_written++;
91 }
92 
allocate_table()93 void CompactHashtableWriter::allocate_table() {
94   int entries_space = 0;
95   for (int index = 0; index < _num_buckets; index++) {
96     GrowableArray<Entry>* bucket = _buckets[index];
97     int bucket_size = bucket->length();
98     if (bucket_size == 1) {
99       entries_space++;
100     } else if (bucket_size > 1) {
101       entries_space += 2 * bucket_size;
102     }
103   }
104 
105   if (entries_space & ~BUCKET_OFFSET_MASK) {
106     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
107                                   "Too many entries.");
108   }
109 
110   _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1);
111   _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space);
112 
113   _stats->bucket_count    = _num_buckets;
114   _stats->bucket_bytes    = _compact_buckets->size() * BytesPerWord;
115   _stats->hashentry_count = _num_entries_written;
116   _stats->hashentry_bytes = _compact_entries->size() * BytesPerWord;
117 }
118 
119 // Write the compact table's buckets
dump_table(NumberSeq * summary)120 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
121   u4 offset = 0;
122   for (int index = 0; index < _num_buckets; index++) {
123     GrowableArray<Entry>* bucket = _buckets[index];
124     int bucket_size = bucket->length();
125     if (bucket_size == 1) {
126       // bucket with one entry is compacted and only has the symbol offset
127       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
128 
129       Entry ent = bucket->at(0);
130       _compact_entries->at_put(offset++, ent.value());
131       _num_value_only_buckets++;
132     } else {
133       // regular bucket, each entry is a symbol (hash, offset) pair
134       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
135 
136       for (int i=0; i<bucket_size; i++) {
137         Entry ent = bucket->at(i);
138         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
139         _compact_entries->at_put(offset++, ent.value());
140       }
141       if (bucket_size == 0) {
142         _num_empty_buckets++;
143       } else {
144         _num_other_buckets++;
145       }
146     }
147     summary->add(bucket_size);
148   }
149 
150   // Mark the end of the buckets
151   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
152   assert(offset == (u4)_compact_entries->length(), "sanity");
153 }
154 
155 
156 // Write the compact table
dump(SimpleCompactHashtable * cht,const char * table_name)157 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
158   NumberSeq summary;
159   allocate_table();
160   dump_table(&summary);
161 
162   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
163   address base_address = address(SharedBaseAddress);
164   cht->init(base_address,  _num_entries_written, _num_buckets,
165             _compact_buckets->data(), _compact_entries->data());
166 
167   LogMessage(cds, hashtables) msg;
168   if (msg.is_info()) {
169     double avg_cost = 0.0;
170     if (_num_entries_written > 0) {
171       avg_cost = double(table_bytes)/double(_num_entries_written);
172     }
173     msg.info("Shared %s table stats -------- base: " PTR_FORMAT,
174                          table_name, (intptr_t)base_address);
175     msg.info("Number of entries       : %9d", _num_entries_written);
176     msg.info("Total bytes used        : %9d", table_bytes);
177     msg.info("Average bytes per entry : %9.3f", avg_cost);
178     msg.info("Average bucket size     : %9.3f", summary.avg());
179     msg.info("Variance of bucket size : %9.3f", summary.variance());
180     msg.info("Std. dev. of bucket size: %9.3f", summary.sd());
181     msg.info("Maximum bucket size     : %9d", (int)summary.maximum());
182     msg.info("Empty buckets           : %9d", _num_empty_buckets);
183     msg.info("Value_Only buckets      : %9d", _num_value_only_buckets);
184     msg.info("Other buckets           : %9d", _num_other_buckets);
185   }
186 }
187 
188 /////////////////////////////////////////////////////////////
189 //
190 // The CompactHashtable implementation
191 //
192 
init(address base_address,u4 entry_count,u4 bucket_count,u4 * buckets,u4 * entries)193 void SimpleCompactHashtable::init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
194   _bucket_count = bucket_count;
195   _entry_count = entry_count;
196   _base_address = base_address;
197   if (DynamicDumpSharedSpaces) {
198     _buckets = DynamicArchive::buffer_to_target(buckets);
199     _entries = DynamicArchive::buffer_to_target(entries);
200   } else {
201     _buckets = buckets;
202     _entries = entries;
203   }
204 }
205 
calculate_header_size()206 size_t SimpleCompactHashtable::calculate_header_size() {
207   // We have 5 fields. Each takes up sizeof(intptr_t). See WriteClosure::do_u4
208   size_t bytes = sizeof(intptr_t) * 5;
209   return bytes;
210 }
211 
serialize_header(SerializeClosure * soc)212 void SimpleCompactHashtable::serialize_header(SerializeClosure* soc) {
213   // NOTE: if you change this function, you MUST change the number 5 in
214   // calculate_header_size() accordingly.
215   soc->do_ptr((void**)&_base_address);
216   soc->do_u4(&_entry_count);
217   soc->do_u4(&_bucket_count);
218   soc->do_ptr((void**)&_buckets);
219   soc->do_ptr((void**)&_entries);
220 }
221 #endif // INCLUDE_CDS
222 
223 #ifndef O_BINARY       // if defined (Win32) use binary files.
224 #define O_BINARY 0     // otherwise do nothing.
225 #endif
226 
227 ////////////////////////////////////////////////////////
228 //
229 // HashtableTextDump
230 //
HashtableTextDump(const char * filename)231 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
232   struct stat st;
233   if (os::stat(filename, &st) != 0) {
234     quit("Unable to get hashtable dump file size", filename);
235   }
236   _size = st.st_size;
237   _fd = os::open(filename, O_RDONLY | O_BINARY, 0);
238   if (_fd < 0) {
239     quit("Unable to open hashtable dump file", filename);
240   }
241   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
242   if (_base == NULL) {
243     quit("Unable to map hashtable dump file", filename);
244   }
245   _p = _base;
246   _end = _base + st.st_size;
247   _filename = filename;
248   _prefix_type = Unknown;
249   _line_no = 1;
250 }
251 
~HashtableTextDump()252 HashtableTextDump::~HashtableTextDump() {
253   os::unmap_memory((char*)_base, _size);
254   if (_fd >= 0) {
255     close(_fd);
256   }
257 }
258 
quit(const char * err,const char * msg)259 void HashtableTextDump::quit(const char* err, const char* msg) {
260   vm_exit_during_initialization(err, msg);
261 }
262 
corrupted(const char * p,const char * msg)263 void HashtableTextDump::corrupted(const char *p, const char* msg) {
264   char info[100];
265   jio_snprintf(info, sizeof(info),
266                "%s. Corrupted at line %d (file pos %d)",
267                msg, _line_no, (int)(p - _base));
268   quit(info, _filename);
269 }
270 
skip_newline()271 bool HashtableTextDump::skip_newline() {
272   if (_p[0] == '\r' && _p[1] == '\n') {
273     _p += 2;
274   } else if (_p[0] == '\n') {
275     _p += 1;
276   } else {
277     corrupted(_p, "Unexpected character");
278   }
279   _line_no++;
280   return true;
281 }
282 
skip(char must_be_char)283 int HashtableTextDump::skip(char must_be_char) {
284   corrupted_if(remain() < 1, "Truncated");
285   corrupted_if(*_p++ != must_be_char, "Unexpected character");
286   return 0;
287 }
288 
skip_past(char c)289 void HashtableTextDump::skip_past(char c) {
290   for (;;) {
291     corrupted_if(remain() < 1, "Truncated");
292     if (*_p++ == c) {
293       return;
294     }
295   }
296 }
297 
check_version(const char * ver)298 void HashtableTextDump::check_version(const char* ver) {
299   int len = (int)strlen(ver);
300   corrupted_if(remain() < len, "Truncated");
301   if (strncmp(_p, ver, len) != 0) {
302     quit("wrong version of hashtable dump file", _filename);
303   }
304   _p += len;
305   skip_newline();
306 }
307 
scan_prefix_type()308 void HashtableTextDump::scan_prefix_type() {
309   _p++;
310   if (strncmp(_p, "SECTION: String", 15) == 0) {
311     _p += 15;
312     _prefix_type = StringPrefix;
313   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
314     _p += 15;
315     _prefix_type = SymbolPrefix;
316   } else {
317     _prefix_type = Unknown;
318   }
319   skip_newline();
320 }
321 
scan_prefix(int * utf8_length)322 int HashtableTextDump::scan_prefix(int* utf8_length) {
323   if (*_p == '@') {
324     scan_prefix_type();
325   }
326 
327   switch (_prefix_type) {
328   case SymbolPrefix:
329     *utf8_length = scan_symbol_prefix(); break;
330   case StringPrefix:
331     *utf8_length = scan_string_prefix(); break;
332   default:
333     tty->print_cr("Shared input data type: Unknown.");
334     corrupted(_p, "Unknown data type");
335   }
336 
337   return _prefix_type;
338 }
339 
scan_string_prefix()340 int HashtableTextDump::scan_string_prefix() {
341   // Expect /[0-9]+: /
342   int utf8_length = 0;
343   get_num(':', &utf8_length);
344   if (*_p != ' ') {
345     corrupted(_p, "Wrong prefix format for string");
346   }
347   _p++;
348   return utf8_length;
349 }
350 
scan_symbol_prefix()351 int HashtableTextDump::scan_symbol_prefix() {
352   // Expect /[0-9]+ (-|)[0-9]+: /
353   int utf8_length = 0;
354   get_num(' ', &utf8_length);
355   if (*_p == '-') {
356     _p++;
357   }
358   int ref_num;
359   get_num(':', &ref_num);
360   if (*_p != ' ') {
361     corrupted(_p, "Wrong prefix format for symbol");
362   }
363   _p++;
364   return utf8_length;
365 }
366 
unescape(const char * from,const char * end,int count)367 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
368   jchar value = 0;
369 
370   corrupted_if(from + count > end, "Truncated");
371 
372   for (int i=0; i<count; i++) {
373     char c = *from++;
374     switch (c) {
375     case '0': case '1': case '2': case '3': case '4':
376     case '5': case '6': case '7': case '8': case '9':
377       value = (value << 4) + c - '0';
378       break;
379     case 'a': case 'b': case 'c':
380     case 'd': case 'e': case 'f':
381       value = (value << 4) + 10 + c - 'a';
382       break;
383     case 'A': case 'B': case 'C':
384     case 'D': case 'E': case 'F':
385       value = (value << 4) + 10 + c - 'A';
386       break;
387     default:
388       ShouldNotReachHere();
389     }
390   }
391   return value;
392 }
393 
get_utf8(char * utf8_buffer,int utf8_length)394 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
395   // cache in local vars
396   const char* from = _p;
397   const char* end = _end;
398   char* to = utf8_buffer;
399   int n = utf8_length;
400 
401   for (; n > 0 && from < end; n--) {
402     if (*from != '\\') {
403       *to++ = *from++;
404     } else {
405       corrupted_if(from + 2 > end, "Truncated");
406       char c = from[1];
407       from += 2;
408       switch (c) {
409       case 'x':
410         {
411           jchar value = unescape(from, end, 2);
412           from += 2;
413           assert(value <= 0xff, "sanity");
414           *to++ = (char)(value & 0xff);
415         }
416         break;
417       case 't':  *to++ = '\t'; break;
418       case 'n':  *to++ = '\n'; break;
419       case 'r':  *to++ = '\r'; break;
420       case '\\': *to++ = '\\'; break;
421       default:
422         corrupted(_p, "Unsupported character");
423       }
424     }
425   }
426   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
427   _p = from;
428   skip_newline();
429 }
430 
431 // NOTE: the content is NOT the same as
432 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
433 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
434 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
put_utf8(outputStream * st,const char * utf8_string,int utf8_length)435 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
436   const char *c = utf8_string;
437   const char *end = c + utf8_length;
438   for (; c < end; c++) {
439     switch (*c) {
440     case '\t': st->print("\\t"); break;
441     case '\r': st->print("\\r"); break;
442     case '\n': st->print("\\n"); break;
443     case '\\': st->print("\\\\"); break;
444     default:
445       if (isprint(*c)) {
446         st->print("%c", *c);
447       } else {
448         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
449       }
450     }
451   }
452 }
453