1 // $Id: lookup.cpp,v 1.53 2004/02/26 13:43:18 ericb Exp $
2 //
3 // This software is subject to the terms of the IBM Jikes Compiler
4 // License Agreement available at the following URL:
5 // http://ibm.com/developerworks/opensource/jikes.
6 // Copyright (C) 1996, 2004 IBM Corporation and others.  All Rights Reserved.
7 // You must accept the terms of that agreement to use this software.
8 //
9 
10 #include "lookup.h"
11 #include "symbol.h"
12 #include "code.h"
13 #include "ast.h"
14 #include "case.h"
15 #include <cwchar>
16 
17 #ifdef HAVE_JIKES_NAMESPACE
18 namespace Jikes { // Open namespace Jikes block
19 #endif
20 
PackageCast()21 PackageSymbol* Symbol::PackageCast()
22 {
23     return DYNAMIC_CAST<PackageSymbol*> (_kind == PACKAGE ? this : NULL);
24 }
25 
PackageCast() const26 const PackageSymbol* Symbol::PackageCast() const
27 {
28     return DYNAMIC_CAST<const PackageSymbol*> (_kind == PACKAGE ? this : NULL);
29 }
30 
TypeCast()31 TypeSymbol* Symbol::TypeCast()
32 {
33     return DYNAMIC_CAST<TypeSymbol*> (_kind == TYPE ? this : NULL);
34 }
35 
TypeCast() const36 const TypeSymbol* Symbol::TypeCast() const
37 {
38     return DYNAMIC_CAST<const TypeSymbol*> (_kind == TYPE ? this : NULL);
39 }
40 
MethodCast()41 MethodSymbol* Symbol::MethodCast()
42 {
43     return DYNAMIC_CAST<MethodSymbol*> (_kind == METHOD ? this : NULL);
44 }
45 
MethodCast() const46 const MethodSymbol* Symbol::MethodCast() const
47 {
48     return DYNAMIC_CAST<const MethodSymbol*> (_kind == METHOD ? this : NULL);
49 }
50 
BlockCast()51 BlockSymbol* Symbol::BlockCast()
52 {
53     return DYNAMIC_CAST<BlockSymbol*> (_kind == BLOCK ? this : NULL);
54 }
55 
BlockCast() const56 const BlockSymbol* Symbol::BlockCast() const
57 {
58     return DYNAMIC_CAST<const BlockSymbol*> (_kind == BLOCK ? this : NULL);
59 }
60 
VariableCast()61 VariableSymbol* Symbol::VariableCast()
62 {
63     return DYNAMIC_CAST<VariableSymbol*> (_kind == VARIABLE ? this : NULL);
64 }
65 
VariableCast() const66 const VariableSymbol* Symbol::VariableCast() const
67 {
68     return DYNAMIC_CAST<const VariableSymbol*>
69         (_kind == VARIABLE ? this : NULL);
70 }
71 
LabelCast()72 LabelSymbol* Symbol::LabelCast()
73 {
74     return DYNAMIC_CAST<LabelSymbol*> (_kind == LABEL ? this : NULL);
75 }
76 
LabelCast() const77 const LabelSymbol* Symbol::LabelCast() const
78 {
79     return DYNAMIC_CAST<const LabelSymbol*> (_kind == LABEL ? this : NULL);
80 }
81 
LiteralCast()82 LiteralSymbol* Symbol::LiteralCast()
83 {
84     return DYNAMIC_CAST<LiteralSymbol*> (_kind == LITERAL ? this : NULL);
85 }
86 
LiteralCast() const87 const LiteralSymbol* Symbol::LiteralCast() const
88 {
89     return DYNAMIC_CAST<const LiteralSymbol*> (_kind == LITERAL ? this : NULL);
90 }
91 
NameCast()92 NameSymbol* Symbol::NameCast()
93 {
94     return DYNAMIC_CAST<NameSymbol*> (_kind == NAME ? this : NULL);
95 }
96 
NameCast() const97 const NameSymbol* Symbol::NameCast() const
98 {
99     return DYNAMIC_CAST<const NameSymbol*> (_kind == NAME ? this : NULL);
100 }
101 
PathCast()102 PathSymbol* Symbol::PathCast()
103 {
104     return DYNAMIC_CAST<PathSymbol*> (_kind == PATH ? this : NULL);
105 }
106 
PathCast() const107 const PathSymbol* Symbol::PathCast() const
108 {
109     return DYNAMIC_CAST<const PathSymbol*> (_kind == PATH ? this : NULL);
110 }
111 
DirectoryCast()112 DirectorySymbol* Symbol::DirectoryCast()
113 {
114     return DYNAMIC_CAST<DirectorySymbol*> (_kind == _DIRECTORY ? this : NULL);
115 }
116 
DirectoryCast() const117 const DirectorySymbol* Symbol::DirectoryCast() const
118 {
119     return DYNAMIC_CAST<const DirectorySymbol*>
120         (_kind == _DIRECTORY ? this : NULL);
121 }
122 
FileCast()123 FileSymbol* Symbol::FileCast()
124 {
125     return DYNAMIC_CAST<FileSymbol*> (_kind == _FILE ? this : NULL);
126 }
127 
FileCast() const128 const FileSymbol* Symbol::FileCast() const
129 {
130     return DYNAMIC_CAST<const FileSymbol*> (_kind == _FILE ? this : NULL);
131 }
132 
133 unsigned SystemTable::primes[] = {DEFAULT_HASH_SIZE, 101, 401, MAX_HASH_SIZE};
134 
SystemTable(unsigned hash_size_)135 SystemTable::SystemTable(unsigned hash_size_)
136     : directories(1024)
137 {
138     hash_size = (hash_size_ <= 0 ? 1 : hash_size_);
139 
140     prime_index = -1;
141     do
142     {
143         if (hash_size < primes[prime_index + 1])
144             break;
145         prime_index++;
146     } while (primes[prime_index] < MAX_HASH_SIZE);
147 
148     base = (Element**) memset(new Element*[hash_size], 0,
149                               hash_size * sizeof(Element*));
150 }
151 
~SystemTable()152 SystemTable::~SystemTable()
153 {
154     for (unsigned i = 0; i < directories.Length(); i++)
155         delete directories[i];
156 
157     delete [] base;
158 }
159 
Rehash()160 void SystemTable::Rehash()
161 {
162     hash_size = primes[++prime_index];
163 
164     delete [] base;
165     base = (Element**) memset(new Element*[hash_size], 0,
166                               hash_size * sizeof(Element*));
167 
168     for (unsigned k = 0; k < directories.Length(); k++)
169     {
170         Element* element = directories[k];
171 
172         int i = hash(element -> device, element -> inode);
173         element -> next = base[i];
174         base[i] = element;
175     }
176 }
177 
FindDirectorySymbol(dev_t device,ino_t inode)178 DirectorySymbol* SystemTable::FindDirectorySymbol(dev_t device, ino_t inode)
179 {
180     int k = hash(device, inode);
181 
182     for (Element* element = base[k]; element; element = element -> next)
183     {
184         if (element -> device == device && element -> inode == inode)
185             return element -> directory_symbol;
186     }
187     return NULL;
188 }
189 
InsertDirectorySymbol(dev_t device,ino_t inode,DirectorySymbol * directory_symbol)190 void SystemTable::InsertDirectorySymbol(dev_t device, ino_t inode,
191                                         DirectorySymbol* directory_symbol)
192 {
193     int k = hash(device, inode);
194 
195     Element* element = new Element(device, inode, directory_symbol);
196     directories.Next() = element;
197 
198     element -> next = base[k];
199     base[k] = element;
200 
201     //
202     // If the set is "adjustable" and the number of unique elements in it
203     // exceeds 2 times the size of the base, and we have not yet reached the
204     // maximum allowable size for a base, reallocate a larger base and rehash
205     // the elements.
206     //
207     if (directories.Length() > (hash_size << 1) &&
208         hash_size < MAX_HASH_SIZE)
209     {
210         Rehash();
211     }
212 }
213 
214 unsigned DirectoryTable::primes[] = {
215     DEFAULT_HASH_SIZE, 2039, 4093, MAX_HASH_SIZE
216 };
217 
DirectoryTable(int estimate)218 DirectoryTable::DirectoryTable(int estimate)
219     : entry_pool(estimate),
220       hash_size(primes[0]),
221       prime_index(0)
222 {
223     base = (DirectoryEntry**) memset(new DirectoryEntry*[hash_size], 0,
224                                      hash_size * sizeof(DirectoryEntry*));
225 }
226 
~DirectoryTable()227 DirectoryTable::~DirectoryTable()
228 {
229     for (unsigned i = 0; i < entry_pool.Length(); i++)
230         delete entry_pool[i];
231     delete [] base;
232 }
233 
234 
FindEntry(char * str,int len)235 DirectoryEntry* DirectoryTable::FindEntry(char* str, int len)
236 {
237     int k = Hash(str, len) % hash_size;
238     DirectoryEntry* entry;
239     for (entry = base[k]; entry; entry = entry -> next)
240     {
241         if (len == entry -> length &&
242             memcmp(entry -> name, str, len * sizeof(char)) == 0)
243         {
244             return entry -> IsDummy() ? (DirectoryEntry*) NULL : entry;
245         }
246     }
247     return NULL;
248 }
249 
250 
Rehash()251 void DirectoryTable::Rehash()
252 {
253     hash_size = primes[++prime_index];
254 
255     delete [] base;
256     base = (DirectoryEntry**) memset(new DirectoryEntry*[hash_size], 0,
257                                      hash_size * sizeof(DirectoryEntry*));
258 
259     for (unsigned i = 0; i < entry_pool.Length(); i++)
260     {
261         DirectoryEntry* e = entry_pool[i];
262         int k = Hash(e -> name, e -> length) % hash_size;
263         e -> next = base[k];
264         base[k] = e;
265     }
266 }
267 
268 
InsertEntry(DirectorySymbol * directory_symbol,char * str,int len)269 DirectoryEntry* DirectoryTable::InsertEntry(DirectorySymbol* directory_symbol,
270                                             char* str, int len)
271 {
272     int k = Hash(str, len) % hash_size;
273     DirectoryEntry* entry;
274     for (entry = base[k]; entry; entry = entry -> next)
275     {
276         if (len == entry -> length &&
277             memcmp(entry -> name, str, len * sizeof(char)) == 0)
278         {
279             return entry;
280         }
281     }
282 
283     entry = new DirectoryEntry();
284     entry_pool.Next() = entry;
285     entry -> Initialize(directory_symbol, str, len);
286 
287     entry -> next = base[k];
288     base[k] = entry;
289 
290     //
291     // If the number of unique elements in the hash table exceeds 2 times
292     // the size of the base, and we have not yet reached the maximum
293     // allowable size for a base, reallocate a larger base and rehash
294     // the elements.
295     //
296     if (entry_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
297         Rehash();
298     return entry;
299 }
300 
301 
302 #ifdef WIN32_FILE_SYSTEM
FindCaseInsensitiveEntry(char * name,int length)303 DirectoryEntry* DirectoryTable::FindCaseInsensitiveEntry(char* name,
304                                                          int length)
305 {
306     char* lower_name = new char[length + 1];
307     for (int i = 0; i < length; i++)
308         lower_name[i] = Case::ToAsciiLower(name[i]);
309     lower_name[length] = U_NULL;
310 
311     DirectoryEntry* entry = FindEntry(lower_name, length);
312     delete [] lower_name;
313     return entry ? entry -> Image() : entry;
314 }
315 
InsertCaseInsensitiveEntry(DirectoryEntry * image)316 void DirectoryTable::InsertCaseInsensitiveEntry(DirectoryEntry* image)
317 {
318     int length = image -> length;
319     char* lower_name = new char[length + 1];
320     for (int i = 0; i < length; i++)
321         lower_name[i] = Case::ToAsciiLower(image -> name[i]);
322     lower_name[length] = U_NULL;
323 
324     int k = Hash(lower_name, length) % hash_size;
325     DirectoryEntry* entry;
326     for (entry = base[k]; entry; entry = entry -> next)
327     {
328         if (length == entry -> length &&
329             memcmp(entry -> name, lower_name, length * sizeof(char)) == 0)
330         {
331             break;
332         }
333     }
334 
335     if (! entry)
336     {
337         FoldedDirectoryEntry* folded_entry = new FoldedDirectoryEntry(image);
338         entry_pool.Next() = folded_entry;
339         folded_entry -> Initialize(image, lower_name, length);
340 
341         folded_entry -> next = base[k];
342         base[k] = folded_entry;
343 
344         //
345         // If the number of unique elements in the hash table exceeds 2 times
346         // the size of the base, and we have not yet reached the maximum
347         // allowable size for a base, reallocate a larger base and rehash
348         // the elements.
349         //
350         if (entry_pool.Length() > (hash_size << 1) &&
351             hash_size < MAX_HASH_SIZE)
352         {
353             Rehash();
354         }
355     }
356 
357     delete [] lower_name;
358 }
359 #endif // WIN32_FILE_SYSTEM
360 
361 
Mtime()362 time_t DirectoryEntry::Mtime()
363 {
364     if (mtime_ == 0)
365     {
366         char* dirname = this -> directory -> DirectoryName();
367         int length = this -> directory -> DirectoryNameLength() +
368             this -> length + 1; // +1 for '/'
369         char* file_name = new char[length + 1];
370         strcpy(file_name, dirname);
371         if (dirname[this -> directory -> DirectoryNameLength() - 1] != U_SLASH)
372             strcat(file_name, StringConstant::U8S_SL);
373         strcat(file_name, this -> name);
374 
375         struct stat status;
376         if (JikesAPI::getInstance() -> stat(file_name, &status) == 0)
377              mtime_ = status.st_mtime;
378         else assert(false && "Cannot compute system time stamp\n");
379 
380         delete [] file_name;
381     }
382     return mtime_;
383 }
384 
385 
386 unsigned NameLookupTable::primes[] = {
387     DEFAULT_HASH_SIZE, 8191, 16411, MAX_HASH_SIZE
388 };
389 
NameLookupTable(int estimate)390 NameLookupTable::NameLookupTable(int estimate)
391     : symbol_pool(estimate),
392       hash_size(primes[0]),
393       prime_index(0)
394 {
395     base = (NameSymbol**) memset(new NameSymbol*[hash_size], 0,
396                                  hash_size * sizeof(NameSymbol*));
397 }
398 
~NameLookupTable()399 NameLookupTable::~NameLookupTable()
400 {
401     for (unsigned i = 0; i < symbol_pool.Length(); i++)
402         delete symbol_pool[i];
403     delete [] base;
404 }
405 
406 
Rehash()407 void NameLookupTable::Rehash()
408 {
409     hash_size = primes[++prime_index];
410 
411     delete [] base;
412     base = (NameSymbol**) memset(new NameSymbol*[hash_size], 0,
413                                  hash_size * sizeof(NameSymbol*));
414 
415     for (unsigned i = 0; i < symbol_pool.Length(); i++)
416     {
417         NameSymbol* ns = symbol_pool[i];
418         int k = ns -> hash_address % hash_size;
419         ns -> next = base[k];
420         base[k] = ns;
421     }
422 }
423 
424 
FindOrInsertName(const wchar_t * str,unsigned len)425 NameSymbol* NameLookupTable::FindOrInsertName(const wchar_t* str, unsigned len)
426 {
427     unsigned hash_address = Hash(str, len);
428     int k = hash_address % hash_size;
429     NameSymbol* symbol;
430     for (symbol = base[k]; symbol; symbol = (NameSymbol*) symbol -> next)
431     {
432         if (hash_address == symbol -> hash_address &&
433             len == symbol -> NameLength() &&
434             memcmp(symbol -> Name(), str, len * sizeof(wchar_t)) == 0)
435         {
436             return symbol;
437         }
438     }
439 
440     int index = symbol_pool.Length(); // index of the next element
441     symbol = new NameSymbol();
442     symbol_pool.Next() = symbol;
443     symbol -> Initialize(str, len, hash_address, index);
444 
445     symbol -> next = base[k];
446     base[k] = symbol;
447 
448     //
449     // If the number of unique elements in the hash table exceeds 2 times
450     // the size of the base, and we have not yet reached the maximum
451     // allowable size for a base, reallocate a larger base and rehash
452     // the elements.
453     //
454     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
455         Rehash();
456     return symbol;
457 }
458 
459 
460 unsigned TypeLookupTable::primes[] = {
461     DEFAULT_HASH_SIZE, 8191, 16411, MAX_HASH_SIZE
462 };
463 
TypeLookupTable(int estimate)464 TypeLookupTable::TypeLookupTable(int estimate)
465     : symbol_pool(estimate),
466       hash_size(primes[0]),
467       prime_index(0)
468 {
469     base = (TypeSymbol**) memset(new TypeSymbol*[hash_size], 0,
470                                  hash_size * sizeof(TypeSymbol*));
471 }
472 
473 
~TypeLookupTable()474 TypeLookupTable::~TypeLookupTable()
475 {
476     delete [] base;
477 }
478 
479 
Rehash()480 void TypeLookupTable::Rehash()
481 {
482     hash_size = primes[++prime_index];
483 
484     delete [] base;
485     base = (TypeSymbol**) memset(new TypeSymbol*[hash_size], 0,
486                                  hash_size * sizeof(TypeSymbol*));
487 
488     for (unsigned i = 0; i < symbol_pool.Length(); i++)
489     {
490         TypeSymbol* type = symbol_pool[i];
491         int k = type -> hash_address % hash_size;
492         type -> next_type = base[k];
493         base[k] = type;
494     }
495 }
496 
497 
FindType(const char * str,int len)498 TypeSymbol* TypeLookupTable::FindType(const char* str, int len)
499 {
500     unsigned hash_address = Hash(str, len);
501     int k = hash_address % hash_size;
502 
503     for (TypeSymbol* type = base[k]; type; type = type -> next_type)
504     {
505         assert(type -> fully_qualified_name);
506 
507         Utf8LiteralValue* fully_qualified_name = type -> fully_qualified_name;
508         if (len == fully_qualified_name -> length &&
509             memcmp(fully_qualified_name -> value, str,
510                    len * sizeof(char)) == 0)
511         {
512             return type;
513         }
514     }
515     return NULL;
516 }
517 
518 
InsertType(TypeSymbol * type)519 void TypeLookupTable::InsertType(TypeSymbol* type)
520 {
521     assert(type && type -> fully_qualified_name);
522 
523     unsigned hash_address = Hash(type -> fully_qualified_name -> value,
524                                  type -> fully_qualified_name -> length);
525     int k = hash_address % hash_size;
526 
527 #ifdef JIKES_DEBUG
528     for (TypeSymbol* t = base[k]; t; t = t -> next_type)
529         assert (type != t && "Type was already entered in type table");
530 #endif
531 
532     symbol_pool.Next() = type;
533     type -> hash_address = hash_address;
534 
535     type -> next_type = base[k];
536     base[k] = type;
537 
538     //
539     // If the number of unique elements in the hash table exceeds 2 times
540     // the size of the base, and we have not yet reached the maximum
541     // allowable size for a base, reallocate a larger base and rehash
542     // the elements.
543     //
544     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
545         Rehash();
546 }
547 
548 
549 //
550 // Remove all elements from the table.
551 //
SetEmpty()552 void TypeLookupTable::SetEmpty()
553 {
554     symbol_pool.Reset();
555     (void) memset(base, 0, hash_size * sizeof(TypeSymbol*));
556 }
557 
558 
559 int IntLiteralTable::int32_limit = 0x7FFFFFFF / 10;
560 unsigned IntLiteralTable::primes[] = {
561     DEFAULT_HASH_SIZE, 8191, 16411, MAX_HASH_SIZE
562 };
563 
IntLiteralTable(LiteralValue * bad_value_)564 IntLiteralTable::IntLiteralTable(LiteralValue* bad_value_)
565     : symbol_pool(16384),
566       hash_size(primes[0]),
567       prime_index(0),
568       bad_value(bad_value_)
569 {
570     base = (IntLiteralValue**) memset(new IntLiteralValue*[hash_size], 0,
571                                       hash_size * sizeof(IntLiteralValue*));
572     symbol_pool.Next() = NULL; // do not use the 0th element
573 }
574 
~IntLiteralTable()575 IntLiteralTable::~IntLiteralTable()
576 {
577     for (unsigned i = 0; i < symbol_pool.Length(); i++)
578         delete symbol_pool[i];
579     delete [] base;
580 }
581 
582 
FindOrInsertChar(LiteralSymbol * literal)583 LiteralValue* IntLiteralTable::FindOrInsertChar(LiteralSymbol* literal)
584 {
585     const wchar_t* name = literal -> Name() + 1;
586     int len = literal -> NameLength() - 2; // discard ''
587 
588     if (len <= 0) // An isolated or unterminated quote.
589         return literal -> value = bad_value;
590     if (len == 1) // A regular character.
591         return literal -> value = FindOrInsert((i4) name[0]);
592 
593     i4 value = -1;
594 
595     if (name[0] == U_BACKSLASH)
596         switch (name[1])
597         {
598         case U_b:
599             value = U_BACKSPACE;
600             break;
601         case U_f:
602             value = U_FORM_FEED;
603             break;
604         case U_n:
605             value = U_LINE_FEED;
606             break;
607         case U_r:
608             value = U_CARRIAGE_RETURN;
609             break;
610         case U_t:
611             value = U_HORIZONTAL_TAB;
612             break;
613         case U_DOUBLE_QUOTE:
614         case U_SINGLE_QUOTE:
615         case U_BACKSLASH:
616             value = name[1];
617             break;
618         case U_0:
619         case U_1:
620         case U_2:
621         case U_3:
622         case U_4:
623         case U_5:
624         case U_6:
625         case U_7:
626             {
627                 value = 0;
628                 int i = 0;
629                 while (++i < len)
630                     value = value * 8 + name[i] - U_0;
631             }
632         }
633     return literal -> value = (value < 0 || value > 65535 ? bad_value
634                                : FindOrInsert(value));
635 }
636 
637 
FindOrInsertHexInt(LiteralSymbol * literal)638 LiteralValue* IntLiteralTable::FindOrInsertHexInt(LiteralSymbol* literal)
639 {
640     const wchar_t* head = literal -> Name() + 1; // point to X
641     const wchar_t* tail = &literal -> Name()[literal -> NameLength() - 1];
642 
643     u4 uvalue = 0;
644 
645     for (++head; tail > head && *head == U_0; head++)
646         ; // skip leading zeroes
647     head--;
648 
649     for (int i = 0; i < 32 && tail > head; i += 4, tail--)
650         uvalue |= Code::Value(*tail) << i;
651     return tail > head ? bad_value : FindOrInsert((i4) uvalue);
652 }
653 
654 
FindOrInsertOctalInt(LiteralSymbol * literal)655 LiteralValue* IntLiteralTable::FindOrInsertOctalInt(LiteralSymbol* literal)
656 {
657     const wchar_t* head = literal -> Name(); // point to initial '0'
658     const wchar_t* tail = &head[literal -> NameLength() - 1];
659 
660     u4 uvalue = 0;
661     for (++head; tail > head && *head == U_0; head++) // skip leading zeroes
662         ;
663     head--;
664 
665     for (int i = 0; i < 30 && tail > head; i += 3, tail--)
666     {
667         u4 d = *tail - U_0;
668         uvalue |= (d << i);
669     }
670 
671     if (tail > head)
672     {
673         u4 d = *tail - U_0;
674 
675         if (d <= 3) // max number that can fit in 2 bits
676         {
677             tail--;
678             uvalue |= (d << 30);
679         }
680     }
681     return tail > head ? bad_value : FindOrInsert((i4) uvalue);
682 }
683 
684 
FindOrInsertInt(LiteralSymbol * literal)685 LiteralValue* IntLiteralTable::FindOrInsertInt(LiteralSymbol* literal)
686 {
687     const wchar_t* name = literal -> Name();
688 
689     if (name[0] == U_0)
690         literal -> value = (name[1] == U_x || name[1] == U_X
691                             ? FindOrInsertHexInt(literal)
692                             : FindOrInsertOctalInt(literal));
693     else
694     {
695         i4 value = 0;
696 
697         const wchar_t* p;
698         for (p = name; *p; p++)
699         {
700             int digit = *p - U_0;
701             if (value > int32_limit || (value == int32_limit && digit > 7))
702                 break;
703             value = value * 10 + digit;
704         }
705 
706         literal -> value = (*p ? bad_value : FindOrInsert(value));
707     }
708     return literal -> value;
709 }
710 
711 
FindOrInsertNegativeInt(LiteralSymbol * literal)712 LiteralValue* IntLiteralTable::FindOrInsertNegativeInt(LiteralSymbol* literal)
713 {
714     if (literal -> value && literal -> value != bad_value)
715     {
716         // A positive value already exists.
717         IntLiteralValue* int_literal = (IntLiteralValue*) literal -> value;
718         return FindOrInsert(- int_literal -> value);
719     }
720 
721     const wchar_t* name = literal -> Name();
722 
723     //
724     // We can assert that the name of a literal contains at least two
725     // characters: at least one digit and the terminating '\0'.
726     //
727     if (name[0] == U_0)
728     {
729         IntLiteralValue* int_literal =
730             (IntLiteralValue*) (name[1] == U_x || name[1] == U_X
731                                  ? FindOrInsertHexInt(literal)
732                                  : FindOrInsertOctalInt(literal));
733         return FindOrInsert(- int_literal -> value);
734     }
735 
736     i4 value = 0;
737 
738     const wchar_t* p;
739     for (p = name; *p; p++)
740     {
741         int digit = *p - U_0;
742         if (value > int32_limit || (value == int32_limit && digit > 8))
743             break;
744         value = value * 10 + digit;
745     }
746     return *p ? bad_value : FindOrInsert(- value);
747 }
748 
749 
Rehash()750 void IntLiteralTable::Rehash()
751 {
752     hash_size = primes[++prime_index];
753 
754     delete [] base;
755     base = (IntLiteralValue**) memset(new IntLiteralValue*[hash_size], 0,
756                                       hash_size * sizeof(IntLiteralValue*));
757 
758     //
759     // Recall that the 0th element is unused.
760     //
761     for (unsigned i = 1; i < symbol_pool.Length(); i++)
762     {
763         IntLiteralValue* ilv = symbol_pool[i];
764         // The unsigned casting turns the negative values into positive values.
765         int k = ((unsigned) ilv -> value) % hash_size;
766         ilv -> next = base[k];
767         base[k] = ilv;
768     }
769 }
770 
771 
Find(i4 value)772 IntLiteralValue* IntLiteralTable::Find(i4 value)
773 {
774     // The unsigned casting turns the negative values into positive values.
775     int k = ((unsigned) value) % hash_size;
776 
777     IntLiteralValue* lit = NULL;
778     for (lit = base[k]; lit; lit = (IntLiteralValue*) lit -> next)
779     {
780         if (lit -> value == value)
781             break;
782     }
783     return lit;
784 }
785 
786 
FindOrInsert(i4 value)787 IntLiteralValue* IntLiteralTable::FindOrInsert(i4 value)
788 {
789     // The unsigned casting turns the negative values into positive values.
790     int k = ((unsigned) value) % hash_size;
791 
792     IntLiteralValue* lit;
793     for (lit = base[k]; lit; lit = (IntLiteralValue*) lit -> next)
794     {
795         if (lit -> value == value)
796             return lit;
797     }
798 
799     lit = new IntLiteralValue();
800     lit -> Initialize(value, symbol_pool.Length());
801     symbol_pool.Next() = lit;
802 
803     lit -> next = base[k];
804     base[k] = lit;
805 
806     //
807     // If the number of unique elements in the hash table exceeds 2 times
808     // the size of the base, and we have not yet reached the maximum
809     // allowable size for a base, reallocate a larger base and rehash
810     // the elements.
811     //
812     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
813         Rehash();
814     return lit;
815 }
816 
817 
818 LongInt LongLiteralTable::int64_limit = LongInt(0x7FFFFFFF, 0xFFFFFFFF) / 10;
819 unsigned LongLiteralTable::primes[] = {
820     DEFAULT_HASH_SIZE, 2039, 4093, MAX_HASH_SIZE
821 };
822 
LongLiteralTable(LiteralValue * bad_value_)823 LongLiteralTable::LongLiteralTable(LiteralValue* bad_value_)
824     : symbol_pool(16384),
825       hash_size(primes[0]),
826       prime_index(0),
827       bad_value(bad_value_)
828 {
829     base = (LongLiteralValue**) memset(new LongLiteralValue*[hash_size], 0,
830                                        hash_size * sizeof(LongLiteralValue*));
831     symbol_pool.Next() = NULL; // do not use the 0th element
832 }
833 
~LongLiteralTable()834 LongLiteralTable::~LongLiteralTable()
835 {
836     for (unsigned i = 0; i < symbol_pool.Length(); i++)
837         delete symbol_pool[i];
838     delete [] base;
839 }
840 
841 
FindOrInsertHexLong(LiteralSymbol * literal)842 LiteralValue* LongLiteralTable::FindOrInsertHexLong(LiteralSymbol* literal)
843 {
844     u4 high = 0;
845     u4 low = 0;
846     const wchar_t* head = literal -> Name() + 1; // point to X
847     // -2 to skip the 'L' suffix
848     const wchar_t* tail = &literal -> Name()[literal -> NameLength() - 2];
849 
850     for (++head; tail > head && *head == U_0; head++) // skip leading zeroes
851         ;
852     head--;
853 
854     for (int i = 0; i < 32 && tail > head; i += 4, tail--)
855         low |= Code::Value(*tail) << i;
856     for (int j = 0; j < 32 && tail > head; j += 4, tail--)
857         high |= Code::Value(*tail) << j;
858     return tail > head ? bad_value : FindOrInsert(LongInt(high, low));
859 }
860 
861 
FindOrInsertOctalLong(LiteralSymbol * literal)862 LiteralValue* LongLiteralTable::FindOrInsertOctalLong(LiteralSymbol* literal)
863 {
864     const wchar_t* head = literal -> Name(); // point to initial '0'
865     // -2 to skip the 'L' suffix
866     const wchar_t* tail = &head[literal -> NameLength() - 2];
867 
868     ULongInt uvalue = 0;
869     for (++head; tail > head && *head == U_0; head++) // skip leading zeroes
870         ;
871     head--;
872 
873     for (int i = 0; i < 63 && tail > head; i += 3, tail--)
874     {
875         ULongInt d = (u4) (*tail - U_0);
876         uvalue |= (d << i);
877     }
878 
879     if (tail > head)
880     {
881         u4 d = *tail - U_0;
882 
883         if (d <= 1) // max number that can fit in 1 bit
884         {
885             tail--;
886             uvalue |= ULongInt((d << 31), 0);
887         }
888     }
889     return tail > head ? bad_value : FindOrInsert((LongInt) uvalue);
890 }
891 
892 
FindOrInsertLong(LiteralSymbol * literal)893 LiteralValue* LongLiteralTable::FindOrInsertLong(LiteralSymbol* literal)
894 {
895     const wchar_t* name = literal -> Name();
896 
897     //
898     // We can assert that the name of a literal contains at least two
899     // characters: at least one digit and the terminating '\0'.
900     //
901     if (name[0] == U_0)
902         literal -> value = (name[1] == U_x || name[1] == U_X
903                             ? FindOrInsertHexLong(literal)
904                             : FindOrInsertOctalLong(literal));
905     else
906     {
907         LongInt value = 0;
908 
909         const wchar_t* p;
910         for (p = name; *p != U_L && *p != U_l; p++)
911         {
912             u4 digit = *p - U_0;
913             if (value > int64_limit || (value == int64_limit && digit > 7))
914                 break;
915             value = value * 10 + digit;
916         }
917 
918         literal -> value = (*p != U_L && *p != U_l ? bad_value
919                             : FindOrInsert(value));
920     }
921     return literal -> value;
922 }
923 
924 
FindOrInsertNegativeLong(LiteralSymbol * literal)925 LiteralValue* LongLiteralTable::FindOrInsertNegativeLong(LiteralSymbol* literal)
926 {
927     // A positive value already exists.
928     if (literal -> value && literal -> value != bad_value)
929     {
930         LongLiteralValue* long_literal = (LongLiteralValue*) literal -> value;
931         return FindOrInsert(- long_literal -> value);
932     }
933 
934     const wchar_t* name = literal -> Name();
935     //
936     // We can assert that the name of a literal contains at least two
937     // characters: at least one digit and the terminating '\0'.
938     //
939     if (name[0] == U_0)
940     {
941         LongLiteralValue* long_literal =
942             (LongLiteralValue*) (name[1] == U_x || name[1] == U_X
943                                   ? FindOrInsertHexLong(literal)
944                                   : FindOrInsertOctalLong(literal));
945         return FindOrInsert(- long_literal -> value);
946     }
947 
948     LongInt value = 0;
949 
950     const wchar_t* p;
951     for (p = name; *p != U_L && *p != U_l && value >= 0; p++)
952     {
953         u4 digit = *p - U_0;
954         if (value > int64_limit || (value == int64_limit && digit > 8))
955             break;
956         value = value * 10 + digit;
957     }
958     return *p != U_L && *p != U_l ? bad_value : FindOrInsert(- value);
959 }
960 
961 
Rehash()962 void LongLiteralTable::Rehash()
963 {
964     hash_size = primes[++prime_index];
965 
966     delete [] base;
967     base = (LongLiteralValue**) memset(new LongLiteralValue*[hash_size], 0,
968                                        hash_size * sizeof(LongLiteralValue*));
969 
970     //
971     // Recall that the 0th element is unused.
972     //
973     for (unsigned i = 1; i < symbol_pool.Length(); i++)
974     {
975         LongLiteralValue* llv = symbol_pool[i];
976         // The hash function for LongInt values is cheap so we don't need to
977         // save it.
978         int k = Hash(llv -> value) % hash_size;
979         llv -> next = base[k];
980         base[k] = llv;
981     }
982 }
983 
984 
FindOrInsert(LongInt value)985 LongLiteralValue* LongLiteralTable::FindOrInsert(LongInt value)
986 {
987     int k = Hash(value) % hash_size;
988 
989     LongLiteralValue* lit;
990     for (lit = base[k]; lit; lit = (LongLiteralValue*) lit -> next)
991     {
992         if (lit -> value == value)
993             return lit;
994     }
995 
996     lit = new LongLiteralValue();
997     lit -> Initialize(value, symbol_pool.Length());
998     symbol_pool.Next() = lit;
999 
1000     lit -> next = base[k];
1001     base[k] = lit;
1002 
1003     //
1004     // If the number of unique elements in the hash table exceeds 2 times
1005     // the size of the base, and we have not yet reached the maximum
1006     // allowable size for a base, reallocate a larger base and rehash
1007     // the elements.
1008     //
1009     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
1010         Rehash();
1011     return lit;
1012 }
1013 
1014 
1015 unsigned FloatLiteralTable::primes[] = {
1016     DEFAULT_HASH_SIZE, 2039, 4093, MAX_HASH_SIZE
1017 };
1018 
FloatLiteralTable(LiteralValue * bad_value_)1019 FloatLiteralTable::FloatLiteralTable(LiteralValue* bad_value_)
1020     : symbol_pool(16384),
1021       hash_size(primes[0]),
1022       prime_index(0),
1023       bad_value(bad_value_)
1024 {
1025     base = (FloatLiteralValue**) memset(new FloatLiteralValue*[hash_size], 0,
1026                                         hash_size * sizeof(FloatLiteralValue*));
1027     symbol_pool.Next() = NULL; // do not use the 0th element
1028 }
1029 
~FloatLiteralTable()1030 FloatLiteralTable::~FloatLiteralTable()
1031 {
1032     for (unsigned i = 0; i < symbol_pool.Length(); i++)
1033         delete symbol_pool[i];
1034     delete [] base;
1035 }
1036 
1037 
FindOrInsertFloat(LiteralSymbol * literal)1038 LiteralValue* FloatLiteralTable::FindOrInsertFloat(LiteralSymbol* literal)
1039 {
1040     char* name = new char[literal -> NameLength() + 1];
1041     for (unsigned i = 0; i < literal -> NameLength(); i++)
1042         name[i] = (char) literal -> Name()[i];
1043     name[literal -> NameLength()] = U_NULL;
1044 
1045     //
1046     // JLS 3.10.2 states it is an error for a literal to round to infinity or 0
1047     // Passing the second parameter tells the constructor to set value to NaN
1048     // if the literal is invalid.
1049     //
1050     IEEEfloat value = IEEEfloat(name, true);
1051 
1052     literal -> value = (value.IsNaN() ? bad_value : FindOrInsert(value));
1053 
1054     delete [] name;
1055     return literal -> value;
1056 }
1057 
1058 
Rehash()1059 void FloatLiteralTable::Rehash()
1060 {
1061     hash_size = primes[++prime_index];
1062 
1063     delete [] base;
1064     base = (FloatLiteralValue**) memset(new FloatLiteralValue*[hash_size], 0,
1065                                         hash_size * sizeof(FloatLiteralValue*));
1066 
1067     //
1068     // Recall that the 0th element is unused.
1069     //
1070     for (unsigned i = 1; i < symbol_pool.Length(); i++)
1071     {
1072         FloatLiteralValue* flv = symbol_pool[i];
1073         // The hash function for float values is cheap so we don't need to
1074         // save it.
1075         int k = Hash(flv -> value) % hash_size;
1076         flv -> next = base[k];
1077         base[k] = flv;
1078     }
1079 }
1080 
1081 
FindOrInsert(IEEEfloat value)1082 FloatLiteralValue* FloatLiteralTable::FindOrInsert(IEEEfloat value)
1083 {
1084     int k = Hash(value) % hash_size;
1085 
1086     FloatLiteralValue* lit;
1087     for (lit = base[k]; lit; lit = (FloatLiteralValue*) lit -> next)
1088     {
1089         if (lit -> value.equals(value))
1090             return lit;
1091     }
1092 
1093     lit = new FloatLiteralValue();
1094     lit -> Initialize(value, symbol_pool.Length());
1095     symbol_pool.Next() = lit;
1096 
1097     lit -> next = base[k];
1098     base[k] = lit;
1099 
1100     //
1101     // If the number of unique elements in the hash table exceeds 2 times
1102     // the size of the base, and we have not yet reached the maximum
1103     // allowable size for a base, reallocate a larger base and rehash
1104     // the elements.
1105     //
1106     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
1107         Rehash();
1108     return lit;
1109 }
1110 
1111 
1112 unsigned DoubleLiteralTable::primes[] = {
1113     DEFAULT_HASH_SIZE, 2039, 4093, MAX_HASH_SIZE
1114 };
1115 
DoubleLiteralTable(LiteralValue * bad_value_)1116 DoubleLiteralTable::DoubleLiteralTable(LiteralValue* bad_value_)
1117     : symbol_pool(16384),
1118       hash_size(primes[0]),
1119       prime_index(0),
1120       bad_value(bad_value_)
1121 {
1122     base = (DoubleLiteralValue**) memset(new DoubleLiteralValue*[hash_size], 0,
1123                                          hash_size * sizeof(DoubleLiteralValue*));
1124     symbol_pool.Next() = NULL; // do not use the 0th element
1125 }
1126 
~DoubleLiteralTable()1127 DoubleLiteralTable::~DoubleLiteralTable()
1128 {
1129     for (unsigned i = 0; i < symbol_pool.Length(); i++)
1130         delete symbol_pool[i];
1131     delete [] base;
1132 }
1133 
1134 
FindOrInsertDouble(LiteralSymbol * literal)1135 LiteralValue* DoubleLiteralTable::FindOrInsertDouble(LiteralSymbol* literal)
1136 {
1137     char* name = new char[literal -> NameLength() + 1];
1138     for (unsigned i = 0; i < literal -> NameLength(); i++)
1139         name[i] = (char) literal -> Name()[i];
1140     name[literal -> NameLength()] = U_NULL;
1141 
1142     //
1143     // JLS 3.10.2 states it is an error for a literal to round to infinity or 0
1144     // Passing the second parameter tells the constructor to set value to NaN
1145     // if the literal is invalid.
1146     //
1147     IEEEdouble value = IEEEdouble(name, true);
1148 
1149     literal -> value = (value.IsNaN() ? bad_value : FindOrInsert(value));
1150 
1151     delete [] name;
1152     return literal -> value;
1153 }
1154 
1155 
Rehash()1156 void DoubleLiteralTable::Rehash()
1157 {
1158     hash_size = primes[++prime_index];
1159 
1160     delete [] base;
1161     base = (DoubleLiteralValue**) memset(new DoubleLiteralValue*[hash_size], 0,
1162                                          hash_size * sizeof(DoubleLiteralValue*));
1163 
1164     //
1165     // Recall that the 0th element is unused.
1166     //
1167     for (unsigned i = 1; i < symbol_pool.Length(); i++)
1168     {
1169         DoubleLiteralValue* dlv = symbol_pool[i];
1170         // The hash function for double values is cheap so we don't need to
1171         // save it.
1172         int k = Hash(dlv -> value) % hash_size;
1173         dlv -> next = base[k];
1174         base[k] = dlv;
1175     }
1176 }
1177 
1178 
FindOrInsert(IEEEdouble value)1179 DoubleLiteralValue* DoubleLiteralTable::FindOrInsert(IEEEdouble value)
1180 {
1181     int k = Hash(value) % hash_size;
1182 
1183     DoubleLiteralValue* lit;
1184     for (lit = base[k]; lit; lit = (DoubleLiteralValue*) lit -> next)
1185     {
1186         if (lit -> value.equals(value))
1187             return lit;
1188     }
1189 
1190     lit = new DoubleLiteralValue();
1191     lit -> Initialize(value, symbol_pool.Length());
1192     symbol_pool.Next() = lit;
1193 
1194     lit -> next = base[k];
1195     base[k] = lit;
1196 
1197     //
1198     // If the number of unique elements in the hash table exceeds 2 times
1199     // the size of the base, and we have not yet reached the maximum
1200     // allowable size for a base, reallocate a larger base and rehash
1201     // the elements.
1202     //
1203     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
1204         Rehash();
1205     return lit;
1206 }
1207 
1208 
FindOrInsertString(LiteralSymbol * literal)1209 LiteralValue* Utf8LiteralTable::FindOrInsertString(LiteralSymbol* literal)
1210 {
1211     const wchar_t* name = literal -> Name() + 1;
1212     int literal_length = literal -> NameLength() - 2; // discard ""
1213 
1214     // Big enough for the worst case: 3 bytes/char + \0.
1215     char* value = new char[literal_length * 3 + 1];
1216     int len = 0;
1217     int i = -1;
1218 
1219     while (++i < literal_length)
1220     {
1221         int ch = name[i];
1222         if (ch == U_BACKSLASH)
1223         {
1224             ch = 0;
1225             switch (name[++i])
1226             {
1227             case U_b:
1228                 ch = U_BACKSPACE;
1229                 break;
1230             case U_f:
1231                 ch = U_FORM_FEED;
1232                 break;
1233             case U_n:
1234                 ch = U_LINE_FEED;
1235                 break;
1236             case U_r:
1237                 ch = U_CARRIAGE_RETURN;
1238                 break;
1239             case U_t:
1240                 ch = U_HORIZONTAL_TAB;
1241                 break;
1242             case U_DOUBLE_QUOTE:
1243             case U_SINGLE_QUOTE:
1244             case U_BACKSLASH:
1245                 ch = name[i];
1246                 break;
1247             case U_0:
1248             case U_1:
1249             case U_2:
1250             case U_3:
1251                 ch = name[i] - U_0;
1252                 if (! Code::IsOctalDigit(name[i + 1]))
1253                     break;
1254                 i++;
1255                 // fallthrough
1256             case U_4:
1257             case U_5:
1258             case U_6:
1259             case U_7:
1260                 ch = ch * 8 + name[i] - U_0;
1261                 if (! Code::IsOctalDigit(name[i + 1]))
1262                     break;
1263                 ch = ch * 8 + name[++i] - U_0;
1264                 break;
1265             default:
1266                 ch = -1;
1267             }
1268         }
1269         else if (Code::IsNewline(ch))
1270             ch = -1;
1271 
1272         if (ch < 0)
1273             break;
1274         else if (ch == 0)
1275         {
1276              value[len++] = (char) 0xC0;
1277              value[len++] = (char) 0x80;
1278         }
1279         else if (ch <= 0x007F)
1280              value[len++] = (char) ch;
1281         else if (ch <= 0x07FF)
1282         {
1283              value[len++] = (char) (0x0C0 | ((ch >> 6) & 0x01F));
1284              value[len++] = (char) (0x080 | (ch & 0x03F));
1285         }
1286         else
1287         {
1288              value[len++] = (char) (0x0E0 | ((ch >> 12) & 0x0F));
1289              value[len++] = (char) (0x080 | ((ch >> 6) & 0x03F));
1290              value[len++] = (char) (0x080 | (ch & 0x03F));
1291         }
1292     }
1293 
1294     value[len] = U_NULL;
1295     literal -> value = (i < literal_length ? bad_value
1296                         : FindOrInsert(value, len));
1297 
1298     delete [] value;
1299     return literal -> value;
1300 }
1301 
1302 
FindOrInsert(wchar_t ch)1303 Utf8LiteralValue* Utf8LiteralTable::FindOrInsert(wchar_t ch)
1304 {
1305     int len = 0;
1306     char str[4];
1307 
1308     if (ch == 0)
1309     {
1310          str[len++] = (char) 0xC0;
1311          str[len++] = (char) 0x80;
1312     }
1313     else if (ch <= 0x007F)
1314          str[len++] = (char) ch;
1315     else if (ch <= 0x07FF)
1316     {
1317          str[len++] = (char) (0x0C0 | ((ch >> 6) & 0x01F));
1318          str[len++] = (char) (0x080 | (ch & 0x03F));
1319     }
1320     else
1321     {
1322          str[len++] = (char) (0x0E0 | (char) ((ch >> 12) & 0x0F));
1323          str[len++] = (char) (0x080 | (char) ((ch >> 6) & 0x03F));
1324          str[len++] = (char) (0x080 | (char) (ch & 0x03F));
1325     }
1326 
1327     str[len] = U_NULL;
1328     return FindOrInsert(str, len);
1329 }
1330 
1331 
Rehash()1332 void Utf8LiteralTable::Rehash()
1333 {
1334     hash_size = primes[++prime_index];
1335 
1336     delete [] base;
1337     base = (Utf8LiteralValue**) memset(new Utf8LiteralValue*[hash_size], 0,
1338                                        hash_size * sizeof(Utf8LiteralValue*));
1339 
1340     //
1341     // Recall that the 0th element is unused.
1342     //
1343     for (unsigned i = 1; i < symbol_pool.Length(); i++)
1344     {
1345         Utf8LiteralValue* ulv = symbol_pool[i];
1346         int k = ulv -> hash_address % hash_size;
1347         ulv -> next = base[k];
1348         base[k] = ulv;
1349     }
1350 }
1351 
1352 
1353 unsigned Utf8LiteralTable::primes[] = {
1354     DEFAULT_HASH_SIZE, 8191, 16411, MAX_HASH_SIZE
1355 };
1356 
Utf8LiteralTable(LiteralValue * bad_value_)1357 Utf8LiteralTable::Utf8LiteralTable(LiteralValue* bad_value_)
1358     : symbol_pool(16384),
1359       hash_size(primes[0]),
1360       prime_index(0),
1361       bad_value(bad_value_)
1362 {
1363     base = (Utf8LiteralValue**) memset(new Utf8LiteralValue*[hash_size], 0,
1364                                        hash_size * sizeof(Utf8LiteralValue*));
1365     symbol_pool.Next() = NULL; // do not use the 0th element
1366 }
1367 
1368 
~Utf8LiteralTable()1369 Utf8LiteralTable::~Utf8LiteralTable()
1370 {
1371     for (unsigned i = 0; i < symbol_pool.Length(); i++)
1372         delete symbol_pool[i];
1373     delete [] base;
1374 }
1375 
1376 
FindOrInsert(const char * str,int len)1377 Utf8LiteralValue* Utf8LiteralTable::FindOrInsert(const char* str, int len)
1378 {
1379     unsigned hash_address = Hash(str, len);
1380     int k = hash_address % hash_size;
1381 
1382     Utf8LiteralValue* lit;
1383     for (lit = base[k]; lit; lit = (Utf8LiteralValue*) lit -> next)
1384     {
1385         if (hash_address == lit -> hash_address &&
1386             len == lit -> length &&
1387             memcmp(lit -> value, str, len * sizeof(char)) == 0)
1388         {
1389             return lit;
1390         }
1391     }
1392 
1393     lit = new Utf8LiteralValue();
1394     lit -> Initialize(str, len, hash_address, symbol_pool.Length());
1395     symbol_pool.Next() = lit;
1396 
1397     lit -> next = base[k];
1398     base[k] = lit;
1399 
1400     //
1401     // If the number of unique elements in the hash table exceeds 2 times
1402     // the size of the base, and we have not yet reached the maximum
1403     // allowable size for a base, reallocate a larger base and rehash
1404     // the elements.
1405     //
1406     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
1407         Rehash();
1408     return lit;
1409 }
1410 
1411 
1412 //
1413 // Collapses all known strings in an expression chain into the leftmost one;
1414 // since the others in the chain have been set to "", this allows the emitter
1415 // to use a single call to StringBuffer.append() for the entire chain.
1416 //
CollectStrings()1417 void Utf8LiteralTable::CollectStrings()
1418 {
1419     unsigned count = utf8_literals -> Length();
1420     assert(count && leftmost_constant_expr);
1421     if (count == 1)
1422     {
1423         if (! leftmost_constant_expr -> NullLiteralCast())
1424             leftmost_constant_expr -> value = (*utf8_literals)[0];
1425     }
1426     else
1427     {
1428         int length = 0;
1429         for (unsigned i = 0; i < count; i++)
1430             length += (*utf8_literals)[i] -> length;
1431         char* str = new char[length + 1]; // +1 for '\0'
1432 
1433         int index = 0;
1434         for (unsigned k = 0; k < count; k++)
1435         {
1436             Utf8LiteralValue* literal = (*utf8_literals)[k];
1437             assert(literal -> value);
1438 
1439             memcpy(&str[index], literal -> value,
1440                    literal -> length * sizeof(char));
1441             index += literal -> length;
1442         }
1443         str[length] = U_NULL;
1444 
1445         leftmost_constant_expr -> value = FindOrInsert(str, length);
1446 
1447         delete [] str;
1448     }
1449     utf8_literals -> Reset();
1450     leftmost_constant_expr = NULL;
1451 }
1452 
1453 
1454 //
1455 // The return value is true iff leftmost_constant_expr != NULL; in other words,
1456 // if the current expression ends in a known String value which can be chained
1457 // to the next expression. As a side effect, if the expression is constant, it
1458 // is in the growing tuple of known strings seen so far; and if the expression
1459 // is not constant, all strings in the tuple are collected into the leftmost
1460 // constant of the previous chain.
1461 //
EndsInKnownString(AstExpression * expression)1462 bool Utf8LiteralTable::EndsInKnownString(AstExpression* expression)
1463 {
1464     if (expression -> IsConstant())
1465     {
1466         //
1467         // CollectStrings only works with Utf8LiteralValue* types, which
1468         // previous code in expr.cpp has already calculated. Here, we replace
1469         // constants with blank strings, and later we replace the left-most
1470         // constant with the concatenated version, so that expressions like
1471         // (nonconst + "a") + "b"; become (nonconst + "ab") + "";.  The
1472         // bytecode emitter is then smart enough to ignore the "".
1473         //
1474         Utf8LiteralValue* literal =
1475             DYNAMIC_CAST<Utf8LiteralValue*> (expression -> value);
1476         assert(literal -> value);
1477 
1478         utf8_literals -> Next() = literal;
1479         if (! leftmost_constant_expr)
1480             leftmost_constant_expr = expression;
1481         else
1482             expression -> value = FindOrInsert("", 0);
1483         return true;
1484     }
1485 
1486     AstBinaryExpression* binary_expr = expression -> BinaryExpressionCast();
1487     AstCastExpression* cast_expr = expression -> CastExpressionCast();
1488     AstParenthesizedExpression* paren_expr =
1489         expression -> ParenthesizedExpressionCast();
1490     AstNullLiteral* null_expr = expression -> NullLiteralCast();
1491     if (binary_expr)
1492     {
1493         //
1494         // If either subexpression is a constant but not a String, we have
1495         // already assigned it a Utf8LiteralValue.  But if a subexpression
1496         // is of type String, we don't know if it is constant yet.  Therefore,
1497         // we recurse to append the constant String for a primitive
1498         // expression, as well as to check if a String expression is constant.
1499         // This relies on the fact that this binary expression is of type
1500         // String. Remember that the null literal is not constant.
1501         //
1502         AstExpression* left  = binary_expr -> left_expression;
1503         AstExpression* right = binary_expr -> right_expression;
1504         if (left -> IsConstant() ||
1505             left -> Type() == expression -> Type())
1506         {
1507             EndsInKnownString(left);
1508         }
1509         if ((right -> IsConstant() ||
1510              right -> Type() == expression -> Type()) &&
1511             EndsInKnownString(right))
1512         {
1513             if (leftmost_constant_expr == left &&
1514                 ! left -> NullLiteralCast() && ! right -> NullLiteralCast())
1515             {
1516                 leftmost_constant_expr = binary_expr;
1517             }
1518             else
1519                 right -> symbol = expression -> Type();
1520             return true;
1521         }
1522     }
1523     else if (cast_expr && EndsInKnownString(cast_expr -> expression))
1524     {
1525         //
1526         // If we get here, the subexpression is necessarily a constant String;
1527         // but this cast is constant only if it is to type String.
1528         //
1529         if (leftmost_constant_expr == cast_expr -> expression &&
1530             cast_expr -> expression -> Type() == cast_expr -> Type())
1531         {
1532             leftmost_constant_expr = cast_expr;
1533         }
1534         return true;
1535     }
1536     else if (paren_expr && EndsInKnownString(paren_expr -> expression))
1537     {
1538         if (leftmost_constant_expr == paren_expr -> expression &&
1539             ! leftmost_constant_expr -> NullLiteralCast())
1540         {
1541             leftmost_constant_expr = paren_expr;
1542         }
1543         return true;
1544     }
1545     else if (null_expr)
1546     {
1547         //
1548         // We are careful that null is never given a string value unless it is
1549         // part of a chain of strings, as it is not a compile-time constant.
1550         //
1551         //ebb hack. This entire method probably belongs in Semantic, where
1552         // we have access to Control.
1553         static char null_literal[] = { U_n, U_u, U_l, U_l, U_NU };
1554         utf8_literals -> Next() = FindOrInsert(null_literal, 4);
1555         if (! leftmost_constant_expr)
1556             leftmost_constant_expr = expression;
1557         else expression -> value = FindOrInsert("", 0);
1558         return true;
1559     }
1560 
1561     if (leftmost_constant_expr)
1562         CollectStrings();
1563     return false; // Not a constant String expression
1564 }
1565 
1566 
1567 //
1568 // This method flattens all known String expressions in the tree into a minimal
1569 // number of utf8 literals. Note that it even flattens non-constant expressions
1570 // (such as (Object)"ab", or null), when there are no side effects which could
1571 // get in the way.  After this method, expression -> IsConstant() will return
1572 // the correct value, but some intermediate subexpressions may return a
1573 // harmless false negative.
1574 //
CheckStringConstant(AstExpression * expression)1575 void Utf8LiteralTable::CheckStringConstant(AstExpression* expression)
1576 {
1577     utf8_literals = new Tuple<Utf8LiteralValue*>(256);
1578     leftmost_constant_expr = NULL;
1579 
1580     if (EndsInKnownString(expression))
1581         CollectStrings();
1582 
1583     delete utf8_literals;
1584 }
1585 
1586 
1587 unsigned LiteralLookupTable::primes[] = {
1588     DEFAULT_HASH_SIZE, 2039, 4093, MAX_HASH_SIZE
1589 };
1590 
LiteralLookupTable()1591 LiteralLookupTable::LiteralLookupTable()
1592     : symbol_pool(16384),
1593       hash_size(primes[0]),
1594       prime_index(0)
1595 {
1596     base = (LiteralSymbol**) memset(new LiteralSymbol*[hash_size], 0,
1597                                     hash_size * sizeof(LiteralSymbol*));
1598 }
1599 
~LiteralLookupTable()1600 LiteralLookupTable::~LiteralLookupTable()
1601 {
1602     for (unsigned i = 0; i < symbol_pool.Length(); i++)
1603         delete symbol_pool[i];
1604     delete [] base;
1605 }
1606 
1607 
Rehash()1608 void LiteralLookupTable::Rehash()
1609 {
1610     hash_size = primes[++prime_index];
1611 
1612     delete [] base;
1613     base = (LiteralSymbol**) memset(new LiteralSymbol*[hash_size], 0,
1614                                     hash_size * sizeof(LiteralSymbol*));
1615 
1616     for (unsigned i = 0; i < symbol_pool.Length(); i++)
1617     {
1618         LiteralSymbol* ls = symbol_pool[i];
1619         int k = ls -> hash_address % hash_size;
1620         ls -> next = base[k];
1621         base[k] = ls;
1622     }
1623 }
1624 
1625 
FindOrInsertLiteral(const wchar_t * str,unsigned len)1626 LiteralSymbol* LiteralLookupTable::FindOrInsertLiteral(const wchar_t* str,
1627                                                        unsigned len)
1628 {
1629     unsigned hash_address = Hash(str, len);
1630     int k = hash_address % hash_size;
1631     LiteralSymbol* symbol;
1632     for (symbol = base[k]; symbol; symbol = (LiteralSymbol*) symbol -> next)
1633     {
1634         if (hash_address == symbol -> hash_address &&
1635             len == symbol -> NameLength() &&
1636             memcmp(symbol -> Name(), str, len * sizeof(wchar_t)) == 0)
1637         {
1638             return symbol;
1639         }
1640     }
1641 
1642     symbol = new LiteralSymbol();
1643     symbol_pool.Next() = symbol;
1644     symbol -> Initialize(str, hash_address, len);
1645 
1646     symbol -> next = base[k];
1647     base[k] = symbol;
1648 
1649     //
1650     // If the number of unique elements in the hash table exceeds 2 times
1651     // the size of the base, and we have not yet reached the maximum
1652     // allowable size for a base, reallocate a larger base and rehash
1653     // the elements.
1654     //
1655     if (symbol_pool.Length() > (hash_size << 1) && hash_size < MAX_HASH_SIZE)
1656         Rehash();
1657     return symbol;
1658 }
1659 
Contains(wchar_t character) const1660 bool NameSymbol::Contains(wchar_t character) const
1661 {
1662     for (wchar_t* ptr = name_; *ptr; ptr++)
1663     {
1664         if (*ptr == character)
1665             return true;
1666     }
1667     return false;
1668 }
1669 
1670 //
1671 // JLS2 6.8 describes the well-established Java naming conventions.
1672 // See also "Effective Java", item 38.
1673 //
1674 
IsBadStyleForClass() const1675 bool NameSymbol::IsBadStyleForClass() const
1676 {
1677     // JLS2 6.8.2
1678     return Code::IsAsciiLower(*name_) || Contains(U_UNDERSCORE);
1679 }
1680 
IsBadStyleForConstantField() const1681 bool NameSymbol::IsBadStyleForConstantField() const
1682 {
1683     // JLS2 6.8.5
1684     for (wchar_t* ptr = name_; *ptr; ptr++)
1685     {
1686         if (Code::IsAsciiLower(*ptr))
1687             return true;
1688     }
1689     return false;
1690 }
1691 
IsBadStyleForField() const1692 bool NameSymbol::IsBadStyleForField() const
1693 {
1694     // JLS2 6.8.4
1695     return IsBadStyleForVariable();
1696 }
1697 
IsBadStyleForMethod() const1698 bool NameSymbol::IsBadStyleForMethod() const
1699 {
1700     // JLS2 6.8.3
1701     return IsBadStyleForVariable();
1702 }
1703 
IsBadStyleForVariable() const1704 bool NameSymbol::IsBadStyleForVariable() const
1705 {
1706     // JLS2 6.8.3, 6.8.4, 6.8.6
1707     return Code::IsAsciiUpper(*name_) || Contains(U_UNDERSCORE);
1708 }
1709 
1710 #ifdef HAVE_JIKES_NAMESPACE
1711 } // Close namespace Jikes block
1712 #endif
1713 
1714