1 /*
2  * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 
26 #include "precompiled.hpp"
27 #include "cds/metaspaceShared.hpp"
28 #include "classfile/altHashing.hpp"
29 #include "classfile/classLoaderData.hpp"
30 #include "classfile/vmSymbols.hpp"
31 #include "gc/shared/collectedHeap.hpp"
32 #include "logging/log.hpp"
33 #include "logging/logStream.hpp"
34 #include "memory/allocation.inline.hpp"
35 #include "memory/resourceArea.hpp"
36 #include "memory/universe.hpp"
37 #include "oops/symbol.hpp"
38 #include "runtime/atomic.hpp"
39 #include "runtime/mutexLocker.hpp"
40 #include "runtime/os.hpp"
41 #include "runtime/signature.hpp"
42 #include "utilities/utf8.hpp"
43 
44 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
45 
pack_hash_and_refcount(short hash,int refcount)46 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
47   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
48   assert(refcount >= 0, "negative refcount");
49   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
50   uint32_t hi = hash;
51   uint32_t lo = refcount;
52   return (hi << 16) | lo;
53 }
54 
Symbol(const u1 * name,int length,int refcount)55 Symbol::Symbol(const u1* name, int length, int refcount) {
56   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
57   _length = length;
58   // _body[0..1] are allocated in the header just by coincidence in the current
59   // implementation of Symbol. They are read by identity_hash(), so make sure they
60   // are initialized.
61   // No other code should assume that _body[0..1] are always allocated. E.g., do
62   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
63   _body[0] = _body[1] = 0;
64   memcpy(_body, name, length);
65 }
66 
operator new(size_t sz,int len)67 void* Symbol::operator new(size_t sz, int len) throw() {
68 #if INCLUDE_CDS
69  if (DumpSharedSpaces) {
70    MutexLocker ml(DumpRegion_lock, Mutex::_no_safepoint_check_flag);
71    // To get deterministic output from -Xshare:dump, we ensure that Symbols are allocated in
72    // increasing addresses. When the symbols are copied into the archive, we preserve their
73    // relative address order (sorted, see ArchiveBuilder::gather_klasses_and_symbols).
74    //
75    // We cannot use arena because arena chunks are allocated by the OS. As a result, for example,
76    // the archived symbol of "java/lang/Object" may sometimes be lower than "java/lang/String", and
77    // sometimes be higher. This would cause non-deterministic contents in the archive.
78    DEBUG_ONLY(static void* last = 0);
79    void* p = (void*)MetaspaceShared::symbol_space_alloc(size(len)*wordSize);
80    assert(p > last, "must increase monotonically");
81    DEBUG_ONLY(last = p);
82    return p;
83  }
84 #endif
85   int alloc_size = size(len)*wordSize;
86   address res = (address) AllocateHeap(alloc_size, mtSymbol);
87   return res;
88 }
89 
operator new(size_t sz,int len,Arena * arena)90 void* Symbol::operator new(size_t sz, int len, Arena* arena) throw() {
91   int alloc_size = size(len)*wordSize;
92   address res = (address)arena->Amalloc_4(alloc_size);
93   return res;
94 }
95 
operator delete(void * p)96 void Symbol::operator delete(void *p) {
97   assert(((Symbol*)p)->refcount() == 0, "should not call this");
98   FreeHeap(p);
99 }
100 
101 #if INCLUDE_CDS
update_identity_hash()102 void Symbol::update_identity_hash() {
103   // This is called at a safepoint during dumping of a static CDS archive. The caller should have
104   // called os::init_random() with a deterministic seed and then iterate all archived Symbols in
105   // a deterministic order.
106   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
107   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), PERM_REFCOUNT);
108 }
109 
set_permanent()110 void Symbol::set_permanent() {
111   // This is called at a safepoint during dumping of a dynamic CDS archive.
112   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
113   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
114 }
115 #endif
116 
117 // ------------------------------------------------------------------
118 // Symbol::index_of
119 //
120 // Finds if the given string is a substring of this symbol's utf8 bytes.
121 // Return -1 on failure.  Otherwise return the first index where str occurs.
index_of_at(int i,const char * str,int len) const122 int Symbol::index_of_at(int i, const char* str, int len) const {
123   assert(i >= 0 && i <= utf8_length(), "oob");
124   if (len <= 0)  return 0;
125   char first_char = str[0];
126   address bytes = (address) ((Symbol*)this)->base();
127   address limit = bytes + utf8_length() - len;  // inclusive limit
128   address scan = bytes + i;
129   if (scan > limit)
130     return -1;
131   for (; scan <= limit; scan++) {
132     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
133     if (scan == NULL)
134       return -1;  // not found
135     assert(scan >= bytes+i && scan <= limit, "scan oob");
136     if (len <= 2
137         ? (char) scan[len-1] == str[len-1]
138         : memcmp(scan+1, str+1, len-1) == 0) {
139       return (int)(scan - bytes);
140     }
141   }
142   return -1;
143 }
144 
145 
as_C_string(char * buf,int size) const146 char* Symbol::as_C_string(char* buf, int size) const {
147   if (size > 0) {
148     int len = MIN2(size - 1, utf8_length());
149     for (int i = 0; i < len; i++) {
150       buf[i] = char_at(i);
151     }
152     buf[len] = '\0';
153   }
154   return buf;
155 }
156 
as_C_string() const157 char* Symbol::as_C_string() const {
158   int len = utf8_length();
159   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
160   return as_C_string(str, len + 1);
161 }
162 
print_utf8_on(outputStream * st) const163 void Symbol::print_utf8_on(outputStream* st) const {
164   st->print("%s", as_C_string());
165 }
166 
print_symbol_on(outputStream * st) const167 void Symbol::print_symbol_on(outputStream* st) const {
168   char *s;
169   st = st ? st : tty;
170   {
171     // ResourceMark may not affect st->print(). If st is a string
172     // stream it could resize, using the same resource arena.
173     ResourceMark rm;
174     s = as_quoted_ascii();
175     s = os::strdup(s);
176   }
177   if (s == NULL) {
178     st->print("(null)");
179   } else {
180     st->print("%s", s);
181     os::free(s);
182   }
183 }
184 
as_quoted_ascii() const185 char* Symbol::as_quoted_ascii() const {
186   const char *ptr = (const char *)&_body[0];
187   int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
188   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
189   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
190   return result;
191 }
192 
as_unicode(int & length) const193 jchar* Symbol::as_unicode(int& length) const {
194   Symbol* this_ptr = (Symbol*)this;
195   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
196   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
197   if (length > 0) {
198     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
199   }
200   return result;
201 }
202 
as_klass_external_name(char * buf,int size) const203 const char* Symbol::as_klass_external_name(char* buf, int size) const {
204   if (size > 0) {
205     char* str    = as_C_string(buf, size);
206     int   length = (int)strlen(str);
207     // Turn all '/'s into '.'s (also for array klasses)
208     for (int index = 0; index < length; index++) {
209       if (str[index] == JVM_SIGNATURE_SLASH) {
210         str[index] = JVM_SIGNATURE_DOT;
211       }
212     }
213     return str;
214   } else {
215     return buf;
216   }
217 }
218 
as_klass_external_name() const219 const char* Symbol::as_klass_external_name() const {
220   char* str    = as_C_string();
221   int   length = (int)strlen(str);
222   // Turn all '/'s into '.'s (also for array klasses)
223   for (int index = 0; index < length; index++) {
224     if (str[index] == JVM_SIGNATURE_SLASH) {
225       str[index] = JVM_SIGNATURE_DOT;
226     }
227   }
228   return str;
229 }
230 
print_class(outputStream * os,const SignatureStream & ss)231 static void print_class(outputStream *os, const SignatureStream& ss) {
232   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
233   for (int i = sb; i < se; ++i) {
234     int ch = ss.raw_char_at(i);
235     if (ch == JVM_SIGNATURE_SLASH) {
236       os->put(JVM_SIGNATURE_DOT);
237     } else {
238       os->put(ch);
239     }
240   }
241 }
242 
print_array(outputStream * os,SignatureStream & ss)243 static void print_array(outputStream *os, SignatureStream& ss) {
244   int dimensions = ss.skip_array_prefix();
245   assert(dimensions > 0, "");
246   if (ss.is_reference()) {
247     print_class(os, ss);
248   } else {
249     os->print("%s", type2name(ss.type()));
250   }
251   for (int i = 0; i < dimensions; ++i) {
252     os->print("[]");
253   }
254 }
255 
print_as_signature_external_return_type(outputStream * os)256 void Symbol::print_as_signature_external_return_type(outputStream *os) {
257   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
258     if (ss.at_return_type()) {
259       if (ss.is_array()) {
260         print_array(os, ss);
261       } else if (ss.is_reference()) {
262         print_class(os, ss);
263       } else {
264         os->print("%s", type2name(ss.type()));
265       }
266     }
267   }
268 }
269 
print_as_signature_external_parameters(outputStream * os)270 void Symbol::print_as_signature_external_parameters(outputStream *os) {
271   bool first = true;
272   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
273     if (ss.at_return_type()) break;
274     if (!first) { os->print(", "); }
275     if (ss.is_array()) {
276       print_array(os, ss);
277     } else if (ss.is_reference()) {
278       print_class(os, ss);
279     } else {
280       os->print("%s", type2name(ss.type()));
281     }
282     first = false;
283   }
284 }
285 
286 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
287 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
288 // lookup to avoid reviving a dead Symbol.
try_increment_refcount()289 bool Symbol::try_increment_refcount() {
290   uint32_t found = _hash_and_refcount;
291   while (true) {
292     uint32_t old_value = found;
293     int refc = extract_refcount(old_value);
294     if (refc == PERM_REFCOUNT) {
295       return true;  // sticky max or created permanent
296     } else if (refc == 0) {
297       return false; // dead, can't revive.
298     } else {
299       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
300       if (found == old_value) {
301         return true; // successfully updated.
302       }
303       // refcount changed, try again.
304     }
305   }
306 }
307 
308 // The increment_refcount() is called when not doing lookup. It is assumed that you
309 // have a symbol with a non-zero refcount and it can't become zero while referenced by
310 // this caller.
increment_refcount()311 void Symbol::increment_refcount() {
312   if (!try_increment_refcount()) {
313 #ifdef ASSERT
314     print();
315     fatal("refcount has gone to zero");
316 #endif
317   }
318 #ifndef PRODUCT
319   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
320     NOT_PRODUCT(Atomic::inc(&_total_count);)
321   }
322 #endif
323 }
324 
325 // Decrement refcount potentially while racing increment, so we need
326 // to check the value after attempting to decrement so that if another
327 // thread increments to PERM_REFCOUNT the value is not decremented.
decrement_refcount()328 void Symbol::decrement_refcount() {
329   uint32_t found = _hash_and_refcount;
330   while (true) {
331     uint32_t old_value = found;
332     int refc = extract_refcount(old_value);
333     if (refc == PERM_REFCOUNT) {
334       return;  // refcount is permanent, permanent is sticky
335     } else if (refc == 0) {
336 #ifdef ASSERT
337       print();
338       fatal("refcount underflow");
339 #endif
340       return;
341     } else {
342       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
343       if (found == old_value) {
344         return;  // successfully updated.
345       }
346       // refcount changed, try again.
347     }
348   }
349 }
350 
make_permanent()351 void Symbol::make_permanent() {
352   uint32_t found = _hash_and_refcount;
353   while (true) {
354     uint32_t old_value = found;
355     int refc = extract_refcount(old_value);
356     if (refc == PERM_REFCOUNT) {
357       return;  // refcount is permanent, permanent is sticky
358     } else if (refc == 0) {
359 #ifdef ASSERT
360       print();
361       fatal("refcount underflow");
362 #endif
363       return;
364     } else {
365       int hash = extract_hash(old_value);
366       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
367       if (found == old_value) {
368         return;  // successfully updated.
369       }
370       // refcount changed, try again.
371     }
372   }
373 }
374 
metaspace_pointers_do(MetaspaceClosure * it)375 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
376   if (log_is_enabled(Trace, cds)) {
377     LogStream trace_stream(Log(cds)::trace());
378     trace_stream.print("Iter(Symbol): %p ", this);
379     print_value_on(&trace_stream);
380     trace_stream.cr();
381   }
382 }
383 
print_on(outputStream * st) const384 void Symbol::print_on(outputStream* st) const {
385   st->print("Symbol: '");
386   print_symbol_on(st);
387   st->print("'");
388   st->print(" count %d", refcount());
389 }
390 
print() const391 void Symbol::print() const { print_on(tty); }
392 
393 // The print_value functions are present in all builds, to support the
394 // disassembler and error reporting.
print_value_on(outputStream * st) const395 void Symbol::print_value_on(outputStream* st) const {
396   st->print("'");
397   for (int i = 0; i < utf8_length(); i++) {
398     st->print("%c", char_at(i));
399   }
400   st->print("'");
401 }
402 
print_value() const403 void Symbol::print_value() const { print_value_on(tty); }
404 
is_valid(Symbol * s)405 bool Symbol::is_valid(Symbol* s) {
406   if (!is_aligned(s, sizeof(MetaWord))) return false;
407   if ((size_t)s < os::min_page_size()) return false;
408 
409   if (!os::is_readable_range(s, s + 1)) return false;
410 
411   // Symbols are not allocated in Java heap.
412   if (Universe::heap()->is_in(s)) return false;
413 
414   int len = s->utf8_length();
415   if (len < 0) return false;
416 
417   jbyte* bytes = (jbyte*) s->bytes();
418   return os::is_readable_range(bytes, bytes + len);
419 }
420 
421 // SymbolTable prints this in its statistics
NOT_PRODUCT(size_t Symbol::_total_count=0;)422 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
423 
424 #ifndef PRODUCT
425 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
426   return vmSymbols::is_valid_id(vm_symbol_id);
427 }
428 #endif
429