1 // $Id: parser.cpp,v 1.28 2004/03/25 13:32:28 ericb Exp $
2 //
3 // This software is subject to the terms of the IBM Jikes Compiler
4 // License Agreement available at the following URL:
5 // http://ibm.com/developerworks/opensource/jikes.
6 // Copyright (C) 1996, 2004 IBM Corporation and others.  All Rights Reserved.
7 // You must accept the terms of that agreement to use this software.
8 //
9 
10 #include "parser.h"
11 #include "ast.h"
12 #include "stream.h"
13 
14 #ifdef HAVE_JIKES_NAMESPACE
15 namespace Jikes { // Open namespace Jikes block
16 #endif
17 
ReallocateStacks()18 void Parser::ReallocateStacks()
19 {
20     int old_stack_length = stack_length;
21 
22     stack_length += STACK_INCREMENT;
23 
24     assert(stack_length <= SHRT_MAX);
25 
26     int* old_stack = stack;
27     stack = (int*) memcpy(new int[stack_length], stack,
28                           old_stack_length * sizeof(int));
29     delete [] old_stack;
30 
31     Location* old_location_stack = location_stack;
32     location_stack = (Location*) memcpy(new Location[stack_length],
33                                         location_stack,
34                                         old_stack_length * sizeof(Location));
35     delete [] old_location_stack;
36 
37     Ast** old_parse_stack = parse_stack;
38     parse_stack = (Ast**) memcpy(new Ast*[stack_length], parse_stack,
39                                  old_stack_length * sizeof(Ast*));
40     delete [] old_parse_stack;
41     // The first time through, we initialize parse_stack[0] to NULL.
42     parse_stack[old_stack_length] = NULL;
43 
44     int* old_temp_stack = temp_stack;
45     temp_stack = (int*) memcpy(new int[stack_length], temp_stack,
46                                old_stack_length * sizeof(int));
47     delete [] old_temp_stack;
48 }
49 
50 
AllocateListNode()51 AstListNode *Parser::AllocateListNode()
52 {
53     AstListNode *p;
54 
55     if (free_list_nodes)
56     {
57         p = free_list_nodes;
58         free_list_nodes = free_list_nodes -> next;
59     }
60     else p = list_node_pool -> NewListNode();
61 
62     return p;
63 }
64 
65 
FreeCircularList(AstListNode * tail)66 void Parser::FreeCircularList(AstListNode* tail)
67 {
68     if (tail)
69     {
70         AstListNode* root = tail -> next;
71         tail -> next = free_list_nodes;
72         free_list_nodes = root;
73     }
74 }
75 
76 
PackageHeaderParse(LexStream * lex_stream_,StoragePool * ast_pool_)77 AstPackageDeclaration* Parser::PackageHeaderParse(LexStream* lex_stream_,
78                                                   StoragePool* ast_pool_)
79 {
80     AstPackageDeclaration* package_declaration = NULL;
81     lex_stream_ -> Reset();
82 
83     if (lex_stream_ -> PackageToken())
84     {
85         ast_pool = ast_pool_;
86         list_node_pool = new StoragePool(lex_stream_ -> NumTokens());
87         free_list_nodes = NULL;
88 
89         parse_package_header_only = true;
90         // We are parsing the whole input and not just a segment.
91         end_token = LexStream::LEX_INFINITY;
92         lex_stream = lex_stream_;
93         Ast *ast = HeaderParse();
94         parse_package_header_only = false;
95 
96         if (ast)
97         {
98             AstCompilationUnit *compilation_unit =
99                 ast -> CompilationUnitCast();
100             if (compilation_unit &&
101                 ! compilation_unit -> BadCompilationUnitCast())
102             {
103                 package_declaration =
104                     compilation_unit -> package_declaration_opt;
105             }
106         }
107 
108         delete list_node_pool;
109     }
110 
111     return package_declaration;
112 }
113 
114 
HeaderParse(LexStream * lex_stream_,StoragePool * ast_pool_)115 AstCompilationUnit* Parser::HeaderParse(LexStream* lex_stream_,
116                                         StoragePool* ast_pool_)
117 {
118     lex_stream_ -> Reset();
119 
120     body_pool = new StoragePool(lex_stream_ -> NumTokens());
121     ast_pool = (ast_pool_ ? ast_pool_ : body_pool);
122     list_node_pool = new StoragePool(lex_stream_ -> NumTokens());
123     free_list_nodes = NULL;
124     AstCompilationUnit *compilation_unit = NULL;
125 
126     parse_header_only = true;
127     // We are parsing the whole input and not just a segment.
128     end_token = LexStream::LEX_INFINITY;
129     lex_stream = lex_stream_;
130     Ast *ast = HeaderParse();
131     parse_header_only = false;
132 
133     if (ast)
134     {
135         compilation_unit = ast -> CompilationUnitCast();
136         if (compilation_unit &&
137             ! compilation_unit -> BadCompilationUnitCast())
138         {
139             if (compilation_unit -> NumTypeDeclarations() == 0)
140                 compilation_unit -> MarkEmpty();
141         }
142     }
143 
144     //
145     // If we succesfully parsed a compilation unit, allocate a storage pool
146     // for it. Subtract the amount of space that's already been allocated for
147     // the headers from the estimate for the bodies.
148     //
149     if (compilation_unit)
150          compilation_unit -> ast_pool = body_pool;
151     else delete body_pool;
152 
153     delete list_node_pool; // free the pool of list nodes
154 
155     return compilation_unit;
156 }
157 
158 
HeaderParse()159 Ast* Parser::HeaderParse()
160 {
161     TokenObject curtok = lex_stream -> Gettoken();
162     int act = START_STATE,
163               current_kind = lex_stream -> Kind(curtok);
164 
165 /*****************************************************************/
166 /* Start parsing.                                                */
167 /*****************************************************************/
168     state_stack_top = -1;
169 
170     //
171     // Process a terminal
172     //
173     while (true)
174     {
175         if (++state_stack_top >= stack_length)
176             ReallocateStacks();
177 
178         stack[state_stack_top] = act;
179         location_stack[state_stack_top] = Loc(curtok);
180 
181         act = t_action(act, current_kind, lex_stream);
182 
183         if (act <= NUM_RULES)
184             state_stack_top--; // make reduction look like a shift-reduce
185         else if (act > ERROR_ACTION)
186         {
187             curtok = lex_stream -> Gettoken();
188             current_kind = lex_stream -> Kind(curtok);
189 
190             act -= ERROR_ACTION;
191         }
192         else if (act < ACCEPT_ACTION)
193         {
194             curtok = lex_stream -> Gettoken();
195             current_kind = lex_stream -> Kind(curtok);
196 
197             continue;
198         }
199         else break;
200 
201         //
202         // Process a non_terminal
203         //
204         do
205         {
206             state_stack_top -= (rhs[act] - 1);
207             (this ->* rule_action[act])();
208             act = nt_action(stack[state_stack_top], lhs[act]);
209         } while (act <= NUM_RULES);
210     } /* process_terminal */
211 
212     if (act == ERROR_ACTION)
213     {
214         //
215         // If any error is found in a package declaration, do not try to
216         // repair it.
217         //
218         if (! parse_package_header_only)
219             RepairParse(curtok);
220 
221         if (parse_stack[0] && parse_stack[0] -> CompilationUnitCast())
222             ((AstCompilationUnit*) parse_stack[0]) -> MarkBad();
223         else parse_stack[0] = NULL;
224     }
225 
226     return parse_stack[0];
227 }
228 
229 
BodyParse(LexStream * lex_stream_,AstClassBody * class_body)230 bool Parser::BodyParse(LexStream* lex_stream_, AstClassBody* class_body)
231 {
232     assert(class_body -> UnparsedClassBodyCast());
233 
234     lex_stream = lex_stream_;
235     ast_pool = class_body -> pool;
236     body_pool = class_body -> pool;
237     list_node_pool = new StoragePool(lex_stream_ -> NumTokens());
238     free_list_nodes = NULL;
239 
240     bool success = Body(class_body);
241 
242     delete list_node_pool; // free the pool of list nodes
243 
244     class_body -> MarkParsed();
245 
246     return success;
247 }
248 
249 
Body(AstClassBody * class_body)250 bool Parser::Body(AstClassBody* class_body)
251 {
252     bool errors_detected = false;
253     unsigned i;
254 
255     for (i = 0; i < class_body -> NumConstructors(); i++)
256     {
257         AstConstructorDeclaration* constructor_decl =
258             class_body -> Constructor(i);
259 
260         if (constructor_decl -> constructor_symbol)
261         {
262             AstMethodBody* block = constructor_decl -> constructor_body;
263             end_token = block -> right_brace_token; // last token in the body
264 
265             AstMethodBody* new_body = ParseSegment(block -> left_brace_token);
266 
267             if (! new_body)
268                 errors_detected = true;
269             else constructor_decl -> constructor_body = new_body;
270         }
271     }
272 
273     for (i = 0; i < class_body -> NumMethods(); i++)
274     {
275         AstMethodDeclaration* method_decl = class_body -> Method(i);
276         if (method_decl -> method_symbol && method_decl -> method_body_opt)
277         {
278             AstMethodBody* block = method_decl -> method_body_opt;
279             end_token = block -> right_brace_token;
280             AstMethodBody* new_block = ParseSegment(block -> left_brace_token);
281             if (! new_block) // a bad block ?
282                 errors_detected = true;
283             else method_decl -> method_body_opt = new_block;
284         }
285     }
286 
287     for (i = 0; i < class_body -> NumNestedClasses(); i++)
288         errors_detected = errors_detected ||
289             ! Body(class_body -> NestedClass(i) -> class_body);
290     for (i = 0; i < class_body -> NumNestedInterfaces(); i++)
291         errors_detected = errors_detected ||
292             ! Body(class_body -> NestedInterface(i) -> class_body);
293     return ! errors_detected;
294 }
295 
296 
InitializerParse(LexStream * stream,AstClassBody * class_body)297 bool Parser::InitializerParse(LexStream* stream, AstClassBody* class_body)
298 {
299     lex_stream = stream;
300     ast_pool = class_body -> pool;
301     body_pool = class_body -> pool;
302     list_node_pool = new StoragePool(stream -> NumTokens());
303     free_list_nodes = NULL;
304 
305     bool success = Initializer(class_body);
306 
307     delete list_node_pool; // free the pool of list nodes
308     return success;
309 }
310 
311 
Initializer(AstClassBody * class_body)312 bool Parser::Initializer(AstClassBody* class_body)
313 {
314     bool errors_detected = false;
315     unsigned i;
316 
317     for (i = 0; i < class_body -> NumStaticInitializers(); i++)
318     {
319          AstMethodBody* block = class_body -> StaticInitializer(i) -> block;
320          end_token = block -> right_brace_token; // last token in the body
321          class_body -> StaticInitializer(i) -> block =
322              ParseSegment(block -> left_brace_token);
323         if (! class_body -> StaticInitializer(i) -> block)
324         {
325             errors_detected = true;
326             // Restore old empty block.
327             class_body -> StaticInitializer(i) -> block = block;
328         }
329     }
330     for (i = 0; i < class_body -> NumInstanceInitializers(); i++)
331     {
332         AstMethodBody* block = class_body -> InstanceInitializer(i) -> block;
333         end_token = block -> right_brace_token; // last token in the body
334         class_body -> InstanceInitializer(i) -> block =
335             ParseSegment(block -> left_brace_token);
336         if (! class_body -> InstanceInitializer(i) -> block)
337         {
338             errors_detected = true;
339             // Restore old empty block.
340             class_body -> InstanceInitializer(i) -> block = block;
341         }
342     }
343     for (i = 0; i < class_body -> NumNestedClasses(); i++)
344         errors_detected = errors_detected ||
345             ! Initializer(class_body -> NestedClass(i) -> class_body);
346     for (i = 0; i < class_body -> NumNestedInterfaces(); i++)
347         errors_detected = errors_detected ||
348             ! Initializer(class_body -> NestedInterface(i) -> class_body);
349     return ! errors_detected;
350 }
351 
352 
ParseSegment(TokenObject start_token)353 AstMethodBody* Parser::ParseSegment(TokenObject start_token)
354 {
355     //
356     // The next call to Gettoken will return the start_token.
357     // However, we initialize curtok to start_token in order
358     // to obtain a valid location for the BodyMarker.
359     //
360     lex_stream -> Reset(start_token);
361     TokenObject curtok = start_token; // get the location of the start_token
362     int act = START_STATE,
363               current_kind = TK_BodyMarker;
364 
365 /*****************************************************************/
366 /* Start parsing.                                                */
367 /*****************************************************************/
368     state_stack_top = -1;
369 
370     //
371     // Process a terminal
372     //
373     while (true)
374     {
375         if (++state_stack_top >= stack_length)
376             ReallocateStacks();
377 
378         stack[state_stack_top] = act;
379         location_stack[state_stack_top] = Loc(curtok);
380 
381         act = t_action(act, current_kind, lex_stream);
382 
383         if (act <= NUM_RULES)
384             state_stack_top--; // make reduction look like a shift-reduce
385         else if (act > ERROR_ACTION)
386         {
387             curtok = lex_stream -> Gettoken(end_token);
388             current_kind = lex_stream -> Kind(curtok);
389 
390             act -= ERROR_ACTION;
391         }
392         else if (act < ACCEPT_ACTION)
393         {
394             curtok = lex_stream -> Gettoken(end_token);
395             current_kind = lex_stream -> Kind(curtok);
396 
397             continue;
398         }
399         else break;
400 
401         //
402         // Process a nonterminal
403         //
404         do
405         {
406             state_stack_top -= (rhs[act] - 1);
407             (this ->* rule_action[act])();
408             act = nt_action(stack[state_stack_top], lhs[act]);
409         } while (act <= NUM_RULES);
410     } /* process_terminal */
411 
412     if (act == ERROR_ACTION)
413     {
414         RepairParse(curtok);
415 
416         parse_stack[0] = NULL;
417     }
418 
419     return DYNAMIC_CAST<AstMethodBody *> (parse_stack[0]);
420 }
421 
422 
RepairParse(TokenObject curtok)423 void Parser::RepairParse(TokenObject curtok)
424 {
425     //
426     // Repair an error
427     //
428     while (true)
429     {
430         //
431         // Pop state stack up to first state that had an
432         // action on the error token.  The net effect is to
433         // remove all default reductions on an empty rule
434         // caused by the error token.
435         //
436         int k;
437         for (k = state_stack_top - 1;
438              k >= 0 && location_stack[k] == Loc(curtok); k--);
439         k++;
440 
441         state_stack_top = k;
442 
443         ErrorRepair(curtok);
444 
445         curtok = lex_stream -> Gettoken(end_token);
446         int act = stack[state_stack_top--];
447         int current_kind = lex_stream -> Kind(curtok);
448 
449         //
450         // Process a terminal
451         //
452         while (true)
453         {
454             if (++state_stack_top >= stack_length)
455                  ReallocateStacks();
456 
457             stack[state_stack_top] = act;
458             location_stack[state_stack_top] = Loc(curtok);
459 
460             act = t_action(act, current_kind, lex_stream);
461 
462             if (act <= NUM_RULES)
463                 state_stack_top--; // make reduction look like a shift-reduce
464             else if (act > ERROR_ACTION)
465             {
466                 curtok = lex_stream -> Gettoken(end_token);
467                 current_kind = lex_stream -> Kind(curtok);
468 
469                 act -= ERROR_ACTION;
470             }
471             else if (act < ACCEPT_ACTION)
472             {
473                 curtok = lex_stream -> Gettoken(end_token);
474                 current_kind = lex_stream -> Kind(curtok);
475 
476                 continue;
477             }
478             //
479             // Since the null string is a valid Java program, this function
480             // will always succeed even if it has to delete the whole input.
481             //
482             else if (act == ACCEPT_ACTION)
483                  return;
484             else
485                 // loop around and keep trying until something is accepted.
486                 break;
487 
488             //
489             // Process a nonterminal
490             //
491             do
492             {
493                 state_stack_top -= (rhs[act] - 1);
494                 (this ->* rule_action[act])();
495                 act = nt_action(stack[state_stack_top], lhs[act]);
496             } while (act <= NUM_RULES);
497         } /* process_terminal */
498     }
499 }
500 
501 
502 //
503 // This routine is invoked when an error is encountered in a "repair"
504 // parser. It will place the parser back into a working configuration
505 // by adjusting the state stack, the current token and the buffer.
506 //
ErrorRepair(TokenObject error_token)507 void Parser::ErrorRepair(TokenObject error_token)
508 {
509     SecondaryRepairInfo repair;
510 
511     repair.code = ERROR_CODE;
512     do
513     {
514         repair.distance = 0;
515         repair.num_deletions = state_stack_top + BUFF_UBOUND;
516 
517         buffer[1] = error_token;
518         buffer[0] = lex_stream -> Previous(buffer[1]);
519 
520         for (int k = 2; k < BUFF_SIZE; k++)
521             buffer[k] = lex_stream -> Next(buffer[k - 1]);
522 
523         int last_index;
524         for (last_index = MAX_DISTANCE - 1;
525              last_index >= 1 &&
526                  lex_stream -> Kind(buffer[last_index]) == EOFT_SYMBOL;
527              last_index--);
528         last_index++;
529 
530         repair = ErrorSurgery(stack, state_stack_top, last_index, repair);
531         error_token = buffer[MAX_DISTANCE - MIN_DISTANCE + 2];
532     } while (repair.code == 0);
533 
534     state_stack_top = repair.stack_position;
535     lex_stream -> Reset(buffer[repair.buffer_position]);
536 }
537 
538 
539 //
540 // Keep cutting "stuff" out around the error until a stable configuration
541 // is found.
542 //
ErrorSurgery(int stck[],int stack_top,int last_index,SecondaryRepairInfo repair)543 SecondaryRepairInfo Parser::ErrorSurgery
544          (int stck[], int stack_top,
545           int last_index, SecondaryRepairInfo repair)
546 {
547     int stack_deletions = 0;
548     Location  previous_loc = Loc(buffer[2]);
549 
550     for (int top = stack_top;
551          top >= 0 && repair.num_deletions >= stack_deletions; top--)
552     {
553         if (location_stack[top] < previous_loc)
554             stack_deletions++;
555         previous_loc = location_stack[top];
556 
557         for (int i = 1;
558              i <= last_index &&
559                  repair.num_deletions >= (stack_deletions + i - 1); i++)
560         {
561             int j = ParseCheck(stck, top, lex_stream -> Kind(buffer[i]),
562                                i + 1);
563 
564             if ((j - i + 1) > MIN_DISTANCE)
565             {
566                 int k = stack_deletions + i - 1;
567                 if ((k < repair.num_deletions) ||
568                     (j - k) > (repair.distance - repair.num_deletions))
569                 {
570                     repair.code = DELETION_CODE;
571                     repair.distance = j;
572                     repair.stack_position = top;
573                     repair.buffer_position = i;
574                     repair.num_deletions = k;
575                 }
576             }
577         }
578     }
579 
580     return repair;
581 }
582 
583 
584 /*****************************************************************/
585 /* Try to parse until first_token and all tokens in BUFFER have  */
586 /* been consumed, or an error is encountered. Return the number  */
587 /* of tokens that were expended before the parse blocked.        */
588 /*****************************************************************/
ParseCheck(int stck[],int stack_top,int first_token,int buffer_position)589 int Parser::ParseCheck(int stck[], int stack_top, int first_token,
590                        int buffer_position)
591 {
592     int max_pos,
593         indx,
594         ct,
595         act,
596         lhs_symbol;
597 
598 /*****************************************************************/
599 /* Initialize pointer for temp_stack and initialize maximum      */
600 /* position of state stack that is still useful.                 */
601 /*****************************************************************/
602     act = stck[stack_top];
603     if (first_token > NT_OFFSET)
604     {
605         temp_stack_top = stack_top;
606         max_pos = stack_top;
607         indx = buffer_position;
608         ct = lex_stream -> Kind(buffer[indx]);
609         lex_stream -> Reset(lex_stream -> Next(buffer[indx]));
610         lhs_symbol = first_token - NT_OFFSET;
611         act = nt_action(act, lhs_symbol);
612         if (act <= NUM_RULES)
613             goto process_non_terminal;
614     }
615     else
616     {
617         temp_stack_top = stack_top - 1;
618         max_pos = temp_stack_top;
619         indx = buffer_position - 1;
620         ct = first_token;
621         lex_stream -> Reset(buffer[buffer_position]);
622     }
623 
624  process_terminal:
625     while (true)
626     {
627         if (++temp_stack_top >= stack_length)  /* Stack overflow!!! */
628             return indx;
629         temp_stack[temp_stack_top] = act;
630 
631         act = t_action(act, ct, lex_stream);
632 
633         if (act <= NUM_RULES)                   /* reduce action */
634             temp_stack_top--;
635         else if (act < ACCEPT_ACTION ||          /* shift action */
636                  act > ERROR_ACTION)        /*shift-reduce action*/
637         {
638             if (indx == MAX_DISTANCE)
639                 return indx;
640             indx++;
641             ct = lex_stream -> Kind(buffer[indx]);
642             lex_stream -> Reset(lex_stream -> Next(buffer[indx]));
643             if (act > ERROR_ACTION)
644                  act -= ERROR_ACTION;
645             else goto process_terminal;
646         }
647         else if (act == ACCEPT_ACTION)           /* accept action */
648              return MAX_DISTANCE;
649         else return indx;                         /* error action */
650 
651         process_non_terminal:
652         do
653         {
654             temp_stack_top -= (rhs[act]-1);
655             lhs_symbol = lhs[act];
656             act = (temp_stack_top > max_pos
657                                   ? temp_stack[temp_stack_top]
658                                   : stck[temp_stack_top]);
659             act = nt_action(act, lhs_symbol);
660         } while (act <= NUM_RULES);
661 
662         max_pos = Min(max_pos, temp_stack_top);
663     } // process_terminal;
664 
665     return 0;
666 }
667 
668 #ifdef HAVE_JIKES_NAMESPACE
669 } // Close namespace Jikes block
670 #endif
671 
672