1 /*
2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
27 
28 #include "oops/array.hpp"
29 #include "oops/symbol.hpp"
30 #include "utilities/hashtable.hpp"
31 
32 template <class T, class N> class CompactHashtable;
33 class NumberSeq;
34 class SimpleCompactHashtable;
35 class SerializeClosure;
36 
37 // Stats for symbol tables in the CDS archive
38 class CompactHashtableStats {
39 public:
40   int hashentry_count;
41   int hashentry_bytes;
42   int bucket_count;
43   int bucket_bytes;
44 };
45 
46 /////////////////////////////////////////////////////////////////////////
47 //
48 // The compact hash table writer. Used at dump time for writing out
49 // the compact table to the shared archive.
50 //
51 // At dump time, the CompactHashtableWriter obtains all entries from the
52 // symbol/string table and adds them to a new temporary hash table. The hash
53 // table size (number of buckets) is calculated using
54 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
55 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
56 // 4 is chosen because it produces smaller sized bucket on average for
57 // faster lookup. It also has relatively small number of empty buckets and
58 // good distribution of the entries.
59 //
60 // We use a simple hash function (hash % num_bucket) for the table.
61 // The new table is compacted when written out. Please see comments
62 // above the CompactHashtable class for the table layout detail. The bucket
63 // offsets are written to the archive as part of the compact table. The
64 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type
65 // (regular or compact) are encoded in bit[31, 30]. For buckets with more
66 // than one entry, both hash and entry offset are written to the
67 // table. For buckets with only one entry, only the entry offset is written
68 // to the table and the buckets are tagged as compact in their type bits.
69 // Buckets without entry are skipped from the table. Their offsets are
70 // still written out for faster lookup.
71 //
72 class CompactHashtableWriter: public StackObj {
73 public:
74   class Entry {
75     unsigned int _hash;
76     u4 _value;
77 
78   public:
Entry()79     Entry() {}
Entry(unsigned int hash,u4 val)80     Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
81 
value()82     u4 value() {
83       return _value;
84     }
hash()85     unsigned int hash() {
86       return _hash;
87     }
88 
operator ==(const CompactHashtableWriter::Entry & other)89     bool operator==(const CompactHashtableWriter::Entry& other) {
90       return (_value == other._value && _hash == other._hash);
91     }
92   }; // class CompactHashtableWriter::Entry
93 
94 private:
95   int _num_entries;
96   int _num_buckets;
97   int _num_empty_buckets;
98   int _num_value_only_buckets;
99   int _num_other_buckets;
100   GrowableArray<Entry>** _buckets;
101   CompactHashtableStats* _stats;
102   Array<u4>* _compact_buckets;
103   Array<u4>* _compact_entries;
104 
105 public:
106   // This is called at dump-time only
107   CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats);
108   ~CompactHashtableWriter();
109 
110   void add(unsigned int hash, u4 value);
add(u4 value)111   void add(u4 value) {
112     add((unsigned int)value, value);
113   }
114 
115 private:
116   void allocate_table();
117   void dump_table(NumberSeq* summary);
118 
119 public:
120   void dump(SimpleCompactHashtable *cht, const char* table_name);
121   const char* table_name();
122 };
123 
124 class CompactSymbolTableWriter: public CompactHashtableWriter {
125 public:
CompactSymbolTableWriter(int num_buckets,CompactHashtableStats * stats)126   CompactSymbolTableWriter(int num_buckets, CompactHashtableStats* stats) :
127     CompactHashtableWriter(num_buckets, stats) {}
128   void add(unsigned int hash, Symbol *symbol);
129   void dump(CompactHashtable<Symbol*, char> *cht);
130 };
131 
132 class CompactStringTableWriter: public CompactHashtableWriter {
133 public:
CompactStringTableWriter(int num_entries,CompactHashtableStats * stats)134   CompactStringTableWriter(int num_entries, CompactHashtableStats* stats) :
135     CompactHashtableWriter(num_entries, stats) {}
136   void add(unsigned int hash, oop string);
137   void dump(CompactHashtable<oop, char> *cht);
138 };
139 
140 #define REGULAR_BUCKET_TYPE       0
141 #define VALUE_ONLY_BUCKET_TYPE    1
142 #define TABLEEND_BUCKET_TYPE      3
143 #define BUCKET_OFFSET_MASK        0x3FFFFFFF
144 #define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
145 #define BUCKET_TYPE_SHIFT         30
146 #define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
147 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
148 
149 /////////////////////////////////////////////////////////////////////////////
150 //
151 // CompactHashtable is used to stored the CDS archive's symbol/string table. Used
152 // at runtime only to access the compact table from the archive.
153 //
154 // Because these tables are read-only (no entries can be added/deleted) at run-time
155 // and tend to have large number of entries, we try to minimize the footprint
156 // cost per entry.
157 //
158 // The CompactHashtable is split into two arrays
159 //
160 //   u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
161 //   u4 entries[<variable size>]
162 //
163 // The size of buckets[] is 'num_buckets + 1'. Each entry of
164 // buckets[] is a 32-bit encoding of the bucket type and bucket offset,
165 // with the type in the left-most 2-bit and offset in the remaining 30-bit.
166 // The last entry is a special type. It contains the end of the last
167 // bucket.
168 //
169 // There are two types of buckets, regular buckets and value_only buckets. The
170 // value_only buckets have '01' in their highest 2-bit, and regular buckets have
171 // '00' in their highest 2-bit.
172 //
173 // For normal buckets, each entry is 8 bytes in the entries[]:
174 //   u4 hash;    /* symbol/string hash */
175 //   union {
176 //     u4 offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
177 //     narrowOop str; /* String narrowOop encoding */
178 //   }
179 //
180 //
181 // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
182 //
183 // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
184 //            is skipped.
185 // buckets[0, 4, 5, ....]
186 //         |  |  |
187 //         |  |  +---+
188 //         |  |      |
189 //         |  +----+ |
190 //         v       v v
191 // entries[H,O,H,O,O,H,O,H,O.....]
192 //
193 // See CompactHashtable::lookup() for how the table is searched at runtime.
194 // See CompactHashtableWriter::dump() for how the table is written at CDS
195 // dump time.
196 //
197 class SimpleCompactHashtable {
198 protected:
199   address  _base_address;
200   u4  _bucket_count;
201   u4  _entry_count;
202   u4* _buckets;
203   u4* _entries;
204 
205 public:
SimpleCompactHashtable()206   SimpleCompactHashtable() {
207     _entry_count = 0;
208     _bucket_count = 0;
209     _buckets = 0;
210     _entries = 0;
211   }
212 
reset()213   void reset() {
214     _bucket_count = 0;
215     _entry_count = 0;
216     _buckets = 0;
217     _entries = 0;
218   }
219 
init(address base_address,u4 entry_count,u4 bucket_count,u4 * buckets,u4 * entries)220   void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
221     _base_address = base_address;
222     _bucket_count = bucket_count;
223     _entry_count = entry_count;
224     _buckets = buckets;
225     _entries = entries;
226   }
227 
228   template <class I> inline void iterate(const I& iterator);
229 
230   bool exists(u4 value);
231 
232   // For reading from/writing to the CDS archive
233   void serialize(SerializeClosure* soc);
234 };
235 
236 template <class T, class N> class CompactHashtable : public SimpleCompactHashtable {
237   friend class VMStructs;
238 
239 public:
240   enum CompactHashtableType {
241     _symbol_table = 0,
242     _string_table = 1
243   };
244 
245 private:
246   u4 _type;
247 
248   inline Symbol* decode_entry(CompactHashtable<Symbol*, char>* const t,
249                               u4 offset, const char* name, int len);
250 
251   inline oop decode_entry(CompactHashtable<oop, char>* const t,
252                           u4 offset, const char* name, int len);
253 public:
CompactHashtable()254   CompactHashtable() : SimpleCompactHashtable() {}
255 
set_type(CompactHashtableType type)256   void set_type(CompactHashtableType type) {
257     _type = (u4)type;
258   }
259 
260   // Lookup an entry from the compact table
261   inline T lookup(const N* name, unsigned int hash, int len);
262 
263   // iterate over symbols
264   void symbols_do(SymbolClosure *cl);
265 
266   // iterate over strings
267   void oops_do(OopClosure* f);
268 
269   // For reading from/writing to the CDS archive
270   void serialize(SerializeClosure* soc);
271 
base_address()272   uintx base_address() {
273     return (uintx) _base_address;
274   }
275 };
276 
277 ////////////////////////////////////////////////////////////////////////
278 //
279 // Read/Write the contents of a hashtable textual dump (created by
280 // SymbolTable::dump and StringTable::dump).
281 // Because the dump file may be big (hundred of MB in extreme cases),
282 // we use mmap for fast access when reading it.
283 //
284 class HashtableTextDump {
285   int _fd;
286   const char* _base;
287   const char* _p;
288   const char* _end;
289   const char* _filename;
290   size_t      _size;
291   int         _prefix_type;
292   int         _line_no;
293 public:
294   HashtableTextDump(const char* filename);
295   ~HashtableTextDump();
296 
297   enum {
298     SymbolPrefix = 1 << 0,
299     StringPrefix = 1 << 1,
300     Unknown = 1 << 2
301   };
302 
303   void quit(const char* err, const char* msg);
304 
remain()305   inline int remain() {
306     return (int)(_end - _p);
307   }
308 
309   void corrupted(const char *p, const char *msg);
310 
corrupted_if(bool cond,const char * msg)311   inline void corrupted_if(bool cond, const char *msg) {
312     if (cond) {
313       corrupted(_p, msg);
314     }
315   }
316 
317   bool skip_newline();
318   int skip(char must_be_char);
319   void skip_past(char c);
320   void check_version(const char* ver);
321 
get_num(char delim,int * num)322   inline void get_num(char delim, int *num) {
323     const char* p   = _p;
324     const char* end = _end;
325     u8 n = 0;
326 
327     while (p < end) {
328       char c = *p++;
329       if ('0' <= c && c <= '9') {
330         n = n * 10 + (c - '0');
331         if (n > (u8)INT_MAX) {
332           corrupted(_p, "Num overflow");
333         }
334       } else if (c == delim) {
335         _p = p;
336         *num = (int)n;
337         return;
338       } else {
339         // Not [0-9], not 'delim'
340         corrupted(_p, "Unrecognized format");;
341       }
342     }
343 
344     corrupted(_end, "Incorrect format");
345     ShouldNotReachHere();
346   }
347 
348   void scan_prefix_type();
349   int scan_prefix(int* utf8_length);
350   int scan_string_prefix();
351   int scan_symbol_prefix();
352 
353   jchar unescape(const char* from, const char* end, int count);
354   void get_utf8(char* utf8_buffer, int utf8_length);
355   static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
356 };
357 
358 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
359