1 /*
2  * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef SHARE_GC_SHARED_STRINGDEDUP_STRINGDEDUPTABLE_HPP
26 #define SHARE_GC_SHARED_STRINGDEDUP_STRINGDEDUPTABLE_HPP
27 
28 #include "gc/shared/stringdedup/stringDedupStat.hpp"
29 #include "runtime/mutexLocker.hpp"
30 
31 class StringDedupEntryCache;
32 class StringDedupUnlinkOrOopsDoClosure;
33 
34 //
35 // Table entry in the deduplication hashtable. Points weakly to the
36 // character array. Can be chained in a linked list in case of hash
37 // collisions or when placed in a freelist in the entry cache.
38 //
39 class StringDedupEntry : public CHeapObj<mtGC> {
40 private:
41   StringDedupEntry* _next;
42   unsigned int      _hash;
43   bool              _latin1;
44   typeArrayOop      _obj;
45 
46 public:
StringDedupEntry()47   StringDedupEntry() :
48     _next(NULL),
49     _hash(0),
50     _latin1(false),
51     _obj(NULL) {
52   }
53 
next()54   StringDedupEntry* next() {
55     return _next;
56   }
57 
next_addr()58   StringDedupEntry** next_addr() {
59     return &_next;
60   }
61 
set_next(StringDedupEntry * next)62   void set_next(StringDedupEntry* next) {
63     _next = next;
64   }
65 
hash()66   unsigned int hash() {
67     return _hash;
68   }
69 
set_hash(unsigned int hash)70   void set_hash(unsigned int hash) {
71     _hash = hash;
72   }
73 
latin1()74   bool latin1() {
75     return _latin1;
76   }
77 
set_latin1(bool latin1)78   void set_latin1(bool latin1) {
79     _latin1 = latin1;
80   }
81 
obj()82   typeArrayOop obj() {
83     return _obj;
84   }
85 
obj_addr()86   typeArrayOop* obj_addr() {
87     return &_obj;
88   }
89 
set_obj(typeArrayOop obj)90   void set_obj(typeArrayOop obj) {
91     _obj = obj;
92   }
93 };
94 
95 //
96 // The deduplication hashtable keeps track of all unique character arrays used
97 // by String objects. Each table entry weakly points to an character array, allowing
98 // otherwise unreachable character arrays to be declared dead and pruned from the
99 // table.
100 //
101 // The table is dynamically resized to accommodate the current number of table entries.
102 // The table has hash buckets with chains for hash collision. If the average chain
103 // length goes above or below given thresholds the table grows or shrinks accordingly.
104 //
105 // The table is also dynamically rehashed (using a new hash seed) if it becomes severely
106 // unbalanced, i.e., a hash chain is significantly longer than average.
107 //
108 // All access to the table is protected by the StringDedupTable_lock, except under
109 // safepoints in which case GC workers are allowed to access a table partitions they
110 // have claimed without first acquiring the lock. Note however, that this applies only
111 // the table partition (i.e. a range of elements in _buckets), not other parts of the
112 // table such as the _entries field, statistics counters, etc.
113 //
114 class StringDedupTable : public CHeapObj<mtGC> {
115 private:
116   // The currently active hashtable instance. Only modified when
117   // the table is resizes or rehashed.
118   static StringDedupTable*        _table;
119 
120   // Cache for reuse and fast alloc/free of table entries.
121   static StringDedupEntryCache*   _entry_cache;
122 
123   StringDedupEntry**              _buckets;
124   size_t                          _size;
125   uintx                           _entries;
126   uintx                           _shrink_threshold;
127   uintx                           _grow_threshold;
128   bool                            _rehash_needed;
129 
130   // The hash seed also dictates which hash function to use. A
131   // zero hash seed means we will use the Java compatible hash
132   // function (which doesn't use a seed), and a non-zero hash
133   // seed means we use the murmur3 hash function.
134   uint64_t                        _hash_seed;
135 
136   // Constants governing table resize/rehash/cache.
137   static const size_t             _min_size;
138   static const size_t             _max_size;
139   static const double             _grow_load_factor;
140   static const double             _shrink_load_factor;
141   static const uintx              _rehash_multiple;
142   static const uintx              _rehash_threshold;
143   static const double             _max_cache_factor;
144 
145   // Table statistics, only used for logging.
146   static uintx                    _entries_added;
147   static uintx                    _entries_removed;
148   static uintx                    _resize_count;
149   static uintx                    _rehash_count;
150 
151   static volatile size_t          _claimed_index;
152 
153   static StringDedupTable*        _resized_table;
154   static StringDedupTable*        _rehashed_table;
155 
156   StringDedupTable(size_t size, uint64_t hash_seed = 0);
157   ~StringDedupTable();
158 
159   // Returns the hash bucket at the given index.
bucket(size_t index)160   StringDedupEntry** bucket(size_t index) {
161     return _buckets + index;
162   }
163 
164   // Returns the hash bucket index for the given hash code.
hash_to_index(unsigned int hash)165   size_t hash_to_index(unsigned int hash) {
166     return (size_t)hash & (_size - 1);
167   }
168 
169   // Adds a new table entry to the given hash bucket.
170   void add(typeArrayOop value, bool latin1, unsigned int hash, StringDedupEntry** list);
171 
172   // Removes the given table entry from the table.
173   void remove(StringDedupEntry** pentry, uint worker_id);
174 
175   // Transfers a table entry from the current table to the destination table.
176   void transfer(StringDedupEntry** pentry, StringDedupTable* dest);
177 
178   // Returns an existing character array in the given hash bucket, or NULL
179   // if no matching character array exists.
180   typeArrayOop lookup(typeArrayOop value, bool latin1, unsigned int hash,
181                       StringDedupEntry** list, uintx &count);
182 
183   // Returns an existing character array in the table, or inserts a new
184   // table entry if no matching character array exists.
185   typeArrayOop lookup_or_add_inner(typeArrayOop value, bool latin1, unsigned int hash);
186 
187   // Thread safe lookup or add of table entry
lookup_or_add(typeArrayOop value,bool latin1,unsigned int hash)188   static typeArrayOop lookup_or_add(typeArrayOop value, bool latin1, unsigned int hash) {
189     // Protect the table from concurrent access. Also note that this lock
190     // acts as a fence for _table, which could have been replaced by a new
191     // instance if the table was resized or rehashed.
192     MutexLocker ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
193     return _table->lookup_or_add_inner(value, latin1, hash);
194   }
195 
196   // Returns true if the hashtable is currently using a Java compatible
197   // hash function.
use_java_hash()198   static bool use_java_hash() {
199     return _table->_hash_seed == 0;
200   }
201 
202   // Computes the hash code for the given character array, using the
203   // currently active hash function and hash seed.
204   static unsigned int hash_code(typeArrayOop value, bool latin1);
205 
206   static uintx unlink_or_oops_do(StringDedupUnlinkOrOopsDoClosure* cl,
207                                  size_t partition_begin,
208                                  size_t partition_end,
209                                  uint worker_id);
210 
211   static size_t claim_table_partition(size_t partition_size);
212 
213   static bool is_resizing();
214   static bool is_rehashing();
215 
216   // If a table resize is needed, returns a newly allocated empty
217   // hashtable of the proper size.
218   static StringDedupTable* prepare_resize();
219 
220   // Installs a newly resized table as the currently active table
221   // and deletes the previously active table.
222   static void finish_resize(StringDedupTable* resized_table);
223 
224   // If a table rehash is needed, returns a newly allocated empty
225   // hashtable and updates the hash seed.
226   static StringDedupTable* prepare_rehash();
227 
228   // Transfers rehashed entries from the currently active table into
229   // the new table. Installs the new table as the currently active table
230   // and deletes the previously active table.
231   static void finish_rehash(StringDedupTable* rehashed_table);
232 
233 public:
234   static void create();
235 
236   // Deduplicates the given String object, or adds its backing
237   // character array to the deduplication hashtable.
238   static void deduplicate(oop java_string, StringDedupStat* stat);
239 
240   static void unlink_or_oops_do(StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id);
241 
242   static void print_statistics();
243   static void verify();
244 
245   // If the table entry cache has grown too large, delete overflowed entries.
246   static void clean_entry_cache();
247 
248   // GC support
249   static void gc_prologue(bool resize_and_rehash_table);
250   static void gc_epilogue();
251 };
252 
253 #endif // SHARE_GC_SHARED_STRINGDEDUP_STRINGDEDUPTABLE_HPP
254