1 // $Id: parser.cpp,v 1.28 2004/03/25 13:32:28 ericb Exp $
2 //
3 // This software is subject to the terms of the IBM Jikes Compiler
4 // License Agreement available at the following URL:
5 // http://ibm.com/developerworks/opensource/jikes.
6 // Copyright (C) 1996, 2004 IBM Corporation and others. All Rights Reserved.
7 // You must accept the terms of that agreement to use this software.
8 //
9
10 #include "parser.h"
11 #include "ast.h"
12 #include "stream.h"
13
14 #ifdef HAVE_JIKES_NAMESPACE
15 namespace Jikes { // Open namespace Jikes block
16 #endif
17
ReallocateStacks()18 void Parser::ReallocateStacks()
19 {
20 int old_stack_length = stack_length;
21
22 stack_length += STACK_INCREMENT;
23
24 assert(stack_length <= SHRT_MAX);
25
26 int* old_stack = stack;
27 stack = (int*) memcpy(new int[stack_length], stack,
28 old_stack_length * sizeof(int));
29 delete [] old_stack;
30
31 Location* old_location_stack = location_stack;
32 location_stack = (Location*) memcpy(new Location[stack_length],
33 location_stack,
34 old_stack_length * sizeof(Location));
35 delete [] old_location_stack;
36
37 Ast** old_parse_stack = parse_stack;
38 parse_stack = (Ast**) memcpy(new Ast*[stack_length], parse_stack,
39 old_stack_length * sizeof(Ast*));
40 delete [] old_parse_stack;
41 // The first time through, we initialize parse_stack[0] to NULL.
42 parse_stack[old_stack_length] = NULL;
43
44 int* old_temp_stack = temp_stack;
45 temp_stack = (int*) memcpy(new int[stack_length], temp_stack,
46 old_stack_length * sizeof(int));
47 delete [] old_temp_stack;
48 }
49
50
AllocateListNode()51 AstListNode *Parser::AllocateListNode()
52 {
53 AstListNode *p;
54
55 if (free_list_nodes)
56 {
57 p = free_list_nodes;
58 free_list_nodes = free_list_nodes -> next;
59 }
60 else p = list_node_pool -> NewListNode();
61
62 return p;
63 }
64
65
FreeCircularList(AstListNode * tail)66 void Parser::FreeCircularList(AstListNode* tail)
67 {
68 if (tail)
69 {
70 AstListNode* root = tail -> next;
71 tail -> next = free_list_nodes;
72 free_list_nodes = root;
73 }
74 }
75
76
PackageHeaderParse(LexStream * lex_stream_,StoragePool * ast_pool_)77 AstPackageDeclaration* Parser::PackageHeaderParse(LexStream* lex_stream_,
78 StoragePool* ast_pool_)
79 {
80 AstPackageDeclaration* package_declaration = NULL;
81 lex_stream_ -> Reset();
82
83 if (lex_stream_ -> PackageToken())
84 {
85 ast_pool = ast_pool_;
86 list_node_pool = new StoragePool(lex_stream_ -> NumTokens());
87 free_list_nodes = NULL;
88
89 parse_package_header_only = true;
90 // We are parsing the whole input and not just a segment.
91 end_token = LexStream::LEX_INFINITY;
92 lex_stream = lex_stream_;
93 Ast *ast = HeaderParse();
94 parse_package_header_only = false;
95
96 if (ast)
97 {
98 AstCompilationUnit *compilation_unit =
99 ast -> CompilationUnitCast();
100 if (compilation_unit &&
101 ! compilation_unit -> BadCompilationUnitCast())
102 {
103 package_declaration =
104 compilation_unit -> package_declaration_opt;
105 }
106 }
107
108 delete list_node_pool;
109 }
110
111 return package_declaration;
112 }
113
114
HeaderParse(LexStream * lex_stream_,StoragePool * ast_pool_)115 AstCompilationUnit* Parser::HeaderParse(LexStream* lex_stream_,
116 StoragePool* ast_pool_)
117 {
118 lex_stream_ -> Reset();
119
120 body_pool = new StoragePool(lex_stream_ -> NumTokens());
121 ast_pool = (ast_pool_ ? ast_pool_ : body_pool);
122 list_node_pool = new StoragePool(lex_stream_ -> NumTokens());
123 free_list_nodes = NULL;
124 AstCompilationUnit *compilation_unit = NULL;
125
126 parse_header_only = true;
127 // We are parsing the whole input and not just a segment.
128 end_token = LexStream::LEX_INFINITY;
129 lex_stream = lex_stream_;
130 Ast *ast = HeaderParse();
131 parse_header_only = false;
132
133 if (ast)
134 {
135 compilation_unit = ast -> CompilationUnitCast();
136 if (compilation_unit &&
137 ! compilation_unit -> BadCompilationUnitCast())
138 {
139 if (compilation_unit -> NumTypeDeclarations() == 0)
140 compilation_unit -> MarkEmpty();
141 }
142 }
143
144 //
145 // If we succesfully parsed a compilation unit, allocate a storage pool
146 // for it. Subtract the amount of space that's already been allocated for
147 // the headers from the estimate for the bodies.
148 //
149 if (compilation_unit)
150 compilation_unit -> ast_pool = body_pool;
151 else delete body_pool;
152
153 delete list_node_pool; // free the pool of list nodes
154
155 return compilation_unit;
156 }
157
158
HeaderParse()159 Ast* Parser::HeaderParse()
160 {
161 TokenObject curtok = lex_stream -> Gettoken();
162 int act = START_STATE,
163 current_kind = lex_stream -> Kind(curtok);
164
165 /*****************************************************************/
166 /* Start parsing. */
167 /*****************************************************************/
168 state_stack_top = -1;
169
170 //
171 // Process a terminal
172 //
173 while (true)
174 {
175 if (++state_stack_top >= stack_length)
176 ReallocateStacks();
177
178 stack[state_stack_top] = act;
179 location_stack[state_stack_top] = Loc(curtok);
180
181 act = t_action(act, current_kind, lex_stream);
182
183 if (act <= NUM_RULES)
184 state_stack_top--; // make reduction look like a shift-reduce
185 else if (act > ERROR_ACTION)
186 {
187 curtok = lex_stream -> Gettoken();
188 current_kind = lex_stream -> Kind(curtok);
189
190 act -= ERROR_ACTION;
191 }
192 else if (act < ACCEPT_ACTION)
193 {
194 curtok = lex_stream -> Gettoken();
195 current_kind = lex_stream -> Kind(curtok);
196
197 continue;
198 }
199 else break;
200
201 //
202 // Process a non_terminal
203 //
204 do
205 {
206 state_stack_top -= (rhs[act] - 1);
207 (this ->* rule_action[act])();
208 act = nt_action(stack[state_stack_top], lhs[act]);
209 } while (act <= NUM_RULES);
210 } /* process_terminal */
211
212 if (act == ERROR_ACTION)
213 {
214 //
215 // If any error is found in a package declaration, do not try to
216 // repair it.
217 //
218 if (! parse_package_header_only)
219 RepairParse(curtok);
220
221 if (parse_stack[0] && parse_stack[0] -> CompilationUnitCast())
222 ((AstCompilationUnit*) parse_stack[0]) -> MarkBad();
223 else parse_stack[0] = NULL;
224 }
225
226 return parse_stack[0];
227 }
228
229
BodyParse(LexStream * lex_stream_,AstClassBody * class_body)230 bool Parser::BodyParse(LexStream* lex_stream_, AstClassBody* class_body)
231 {
232 assert(class_body -> UnparsedClassBodyCast());
233
234 lex_stream = lex_stream_;
235 ast_pool = class_body -> pool;
236 body_pool = class_body -> pool;
237 list_node_pool = new StoragePool(lex_stream_ -> NumTokens());
238 free_list_nodes = NULL;
239
240 bool success = Body(class_body);
241
242 delete list_node_pool; // free the pool of list nodes
243
244 class_body -> MarkParsed();
245
246 return success;
247 }
248
249
Body(AstClassBody * class_body)250 bool Parser::Body(AstClassBody* class_body)
251 {
252 bool errors_detected = false;
253 unsigned i;
254
255 for (i = 0; i < class_body -> NumConstructors(); i++)
256 {
257 AstConstructorDeclaration* constructor_decl =
258 class_body -> Constructor(i);
259
260 if (constructor_decl -> constructor_symbol)
261 {
262 AstMethodBody* block = constructor_decl -> constructor_body;
263 end_token = block -> right_brace_token; // last token in the body
264
265 AstMethodBody* new_body = ParseSegment(block -> left_brace_token);
266
267 if (! new_body)
268 errors_detected = true;
269 else constructor_decl -> constructor_body = new_body;
270 }
271 }
272
273 for (i = 0; i < class_body -> NumMethods(); i++)
274 {
275 AstMethodDeclaration* method_decl = class_body -> Method(i);
276 if (method_decl -> method_symbol && method_decl -> method_body_opt)
277 {
278 AstMethodBody* block = method_decl -> method_body_opt;
279 end_token = block -> right_brace_token;
280 AstMethodBody* new_block = ParseSegment(block -> left_brace_token);
281 if (! new_block) // a bad block ?
282 errors_detected = true;
283 else method_decl -> method_body_opt = new_block;
284 }
285 }
286
287 for (i = 0; i < class_body -> NumNestedClasses(); i++)
288 errors_detected = errors_detected ||
289 ! Body(class_body -> NestedClass(i) -> class_body);
290 for (i = 0; i < class_body -> NumNestedInterfaces(); i++)
291 errors_detected = errors_detected ||
292 ! Body(class_body -> NestedInterface(i) -> class_body);
293 return ! errors_detected;
294 }
295
296
InitializerParse(LexStream * stream,AstClassBody * class_body)297 bool Parser::InitializerParse(LexStream* stream, AstClassBody* class_body)
298 {
299 lex_stream = stream;
300 ast_pool = class_body -> pool;
301 body_pool = class_body -> pool;
302 list_node_pool = new StoragePool(stream -> NumTokens());
303 free_list_nodes = NULL;
304
305 bool success = Initializer(class_body);
306
307 delete list_node_pool; // free the pool of list nodes
308 return success;
309 }
310
311
Initializer(AstClassBody * class_body)312 bool Parser::Initializer(AstClassBody* class_body)
313 {
314 bool errors_detected = false;
315 unsigned i;
316
317 for (i = 0; i < class_body -> NumStaticInitializers(); i++)
318 {
319 AstMethodBody* block = class_body -> StaticInitializer(i) -> block;
320 end_token = block -> right_brace_token; // last token in the body
321 class_body -> StaticInitializer(i) -> block =
322 ParseSegment(block -> left_brace_token);
323 if (! class_body -> StaticInitializer(i) -> block)
324 {
325 errors_detected = true;
326 // Restore old empty block.
327 class_body -> StaticInitializer(i) -> block = block;
328 }
329 }
330 for (i = 0; i < class_body -> NumInstanceInitializers(); i++)
331 {
332 AstMethodBody* block = class_body -> InstanceInitializer(i) -> block;
333 end_token = block -> right_brace_token; // last token in the body
334 class_body -> InstanceInitializer(i) -> block =
335 ParseSegment(block -> left_brace_token);
336 if (! class_body -> InstanceInitializer(i) -> block)
337 {
338 errors_detected = true;
339 // Restore old empty block.
340 class_body -> InstanceInitializer(i) -> block = block;
341 }
342 }
343 for (i = 0; i < class_body -> NumNestedClasses(); i++)
344 errors_detected = errors_detected ||
345 ! Initializer(class_body -> NestedClass(i) -> class_body);
346 for (i = 0; i < class_body -> NumNestedInterfaces(); i++)
347 errors_detected = errors_detected ||
348 ! Initializer(class_body -> NestedInterface(i) -> class_body);
349 return ! errors_detected;
350 }
351
352
ParseSegment(TokenObject start_token)353 AstMethodBody* Parser::ParseSegment(TokenObject start_token)
354 {
355 //
356 // The next call to Gettoken will return the start_token.
357 // However, we initialize curtok to start_token in order
358 // to obtain a valid location for the BodyMarker.
359 //
360 lex_stream -> Reset(start_token);
361 TokenObject curtok = start_token; // get the location of the start_token
362 int act = START_STATE,
363 current_kind = TK_BodyMarker;
364
365 /*****************************************************************/
366 /* Start parsing. */
367 /*****************************************************************/
368 state_stack_top = -1;
369
370 //
371 // Process a terminal
372 //
373 while (true)
374 {
375 if (++state_stack_top >= stack_length)
376 ReallocateStacks();
377
378 stack[state_stack_top] = act;
379 location_stack[state_stack_top] = Loc(curtok);
380
381 act = t_action(act, current_kind, lex_stream);
382
383 if (act <= NUM_RULES)
384 state_stack_top--; // make reduction look like a shift-reduce
385 else if (act > ERROR_ACTION)
386 {
387 curtok = lex_stream -> Gettoken(end_token);
388 current_kind = lex_stream -> Kind(curtok);
389
390 act -= ERROR_ACTION;
391 }
392 else if (act < ACCEPT_ACTION)
393 {
394 curtok = lex_stream -> Gettoken(end_token);
395 current_kind = lex_stream -> Kind(curtok);
396
397 continue;
398 }
399 else break;
400
401 //
402 // Process a nonterminal
403 //
404 do
405 {
406 state_stack_top -= (rhs[act] - 1);
407 (this ->* rule_action[act])();
408 act = nt_action(stack[state_stack_top], lhs[act]);
409 } while (act <= NUM_RULES);
410 } /* process_terminal */
411
412 if (act == ERROR_ACTION)
413 {
414 RepairParse(curtok);
415
416 parse_stack[0] = NULL;
417 }
418
419 return DYNAMIC_CAST<AstMethodBody *> (parse_stack[0]);
420 }
421
422
RepairParse(TokenObject curtok)423 void Parser::RepairParse(TokenObject curtok)
424 {
425 //
426 // Repair an error
427 //
428 while (true)
429 {
430 //
431 // Pop state stack up to first state that had an
432 // action on the error token. The net effect is to
433 // remove all default reductions on an empty rule
434 // caused by the error token.
435 //
436 int k;
437 for (k = state_stack_top - 1;
438 k >= 0 && location_stack[k] == Loc(curtok); k--);
439 k++;
440
441 state_stack_top = k;
442
443 ErrorRepair(curtok);
444
445 curtok = lex_stream -> Gettoken(end_token);
446 int act = stack[state_stack_top--];
447 int current_kind = lex_stream -> Kind(curtok);
448
449 //
450 // Process a terminal
451 //
452 while (true)
453 {
454 if (++state_stack_top >= stack_length)
455 ReallocateStacks();
456
457 stack[state_stack_top] = act;
458 location_stack[state_stack_top] = Loc(curtok);
459
460 act = t_action(act, current_kind, lex_stream);
461
462 if (act <= NUM_RULES)
463 state_stack_top--; // make reduction look like a shift-reduce
464 else if (act > ERROR_ACTION)
465 {
466 curtok = lex_stream -> Gettoken(end_token);
467 current_kind = lex_stream -> Kind(curtok);
468
469 act -= ERROR_ACTION;
470 }
471 else if (act < ACCEPT_ACTION)
472 {
473 curtok = lex_stream -> Gettoken(end_token);
474 current_kind = lex_stream -> Kind(curtok);
475
476 continue;
477 }
478 //
479 // Since the null string is a valid Java program, this function
480 // will always succeed even if it has to delete the whole input.
481 //
482 else if (act == ACCEPT_ACTION)
483 return;
484 else
485 // loop around and keep trying until something is accepted.
486 break;
487
488 //
489 // Process a nonterminal
490 //
491 do
492 {
493 state_stack_top -= (rhs[act] - 1);
494 (this ->* rule_action[act])();
495 act = nt_action(stack[state_stack_top], lhs[act]);
496 } while (act <= NUM_RULES);
497 } /* process_terminal */
498 }
499 }
500
501
502 //
503 // This routine is invoked when an error is encountered in a "repair"
504 // parser. It will place the parser back into a working configuration
505 // by adjusting the state stack, the current token and the buffer.
506 //
ErrorRepair(TokenObject error_token)507 void Parser::ErrorRepair(TokenObject error_token)
508 {
509 SecondaryRepairInfo repair;
510
511 repair.code = ERROR_CODE;
512 do
513 {
514 repair.distance = 0;
515 repair.num_deletions = state_stack_top + BUFF_UBOUND;
516
517 buffer[1] = error_token;
518 buffer[0] = lex_stream -> Previous(buffer[1]);
519
520 for (int k = 2; k < BUFF_SIZE; k++)
521 buffer[k] = lex_stream -> Next(buffer[k - 1]);
522
523 int last_index;
524 for (last_index = MAX_DISTANCE - 1;
525 last_index >= 1 &&
526 lex_stream -> Kind(buffer[last_index]) == EOFT_SYMBOL;
527 last_index--);
528 last_index++;
529
530 repair = ErrorSurgery(stack, state_stack_top, last_index, repair);
531 error_token = buffer[MAX_DISTANCE - MIN_DISTANCE + 2];
532 } while (repair.code == 0);
533
534 state_stack_top = repair.stack_position;
535 lex_stream -> Reset(buffer[repair.buffer_position]);
536 }
537
538
539 //
540 // Keep cutting "stuff" out around the error until a stable configuration
541 // is found.
542 //
ErrorSurgery(int stck[],int stack_top,int last_index,SecondaryRepairInfo repair)543 SecondaryRepairInfo Parser::ErrorSurgery
544 (int stck[], int stack_top,
545 int last_index, SecondaryRepairInfo repair)
546 {
547 int stack_deletions = 0;
548 Location previous_loc = Loc(buffer[2]);
549
550 for (int top = stack_top;
551 top >= 0 && repair.num_deletions >= stack_deletions; top--)
552 {
553 if (location_stack[top] < previous_loc)
554 stack_deletions++;
555 previous_loc = location_stack[top];
556
557 for (int i = 1;
558 i <= last_index &&
559 repair.num_deletions >= (stack_deletions + i - 1); i++)
560 {
561 int j = ParseCheck(stck, top, lex_stream -> Kind(buffer[i]),
562 i + 1);
563
564 if ((j - i + 1) > MIN_DISTANCE)
565 {
566 int k = stack_deletions + i - 1;
567 if ((k < repair.num_deletions) ||
568 (j - k) > (repair.distance - repair.num_deletions))
569 {
570 repair.code = DELETION_CODE;
571 repair.distance = j;
572 repair.stack_position = top;
573 repair.buffer_position = i;
574 repair.num_deletions = k;
575 }
576 }
577 }
578 }
579
580 return repair;
581 }
582
583
584 /*****************************************************************/
585 /* Try to parse until first_token and all tokens in BUFFER have */
586 /* been consumed, or an error is encountered. Return the number */
587 /* of tokens that were expended before the parse blocked. */
588 /*****************************************************************/
ParseCheck(int stck[],int stack_top,int first_token,int buffer_position)589 int Parser::ParseCheck(int stck[], int stack_top, int first_token,
590 int buffer_position)
591 {
592 int max_pos,
593 indx,
594 ct,
595 act,
596 lhs_symbol;
597
598 /*****************************************************************/
599 /* Initialize pointer for temp_stack and initialize maximum */
600 /* position of state stack that is still useful. */
601 /*****************************************************************/
602 act = stck[stack_top];
603 if (first_token > NT_OFFSET)
604 {
605 temp_stack_top = stack_top;
606 max_pos = stack_top;
607 indx = buffer_position;
608 ct = lex_stream -> Kind(buffer[indx]);
609 lex_stream -> Reset(lex_stream -> Next(buffer[indx]));
610 lhs_symbol = first_token - NT_OFFSET;
611 act = nt_action(act, lhs_symbol);
612 if (act <= NUM_RULES)
613 goto process_non_terminal;
614 }
615 else
616 {
617 temp_stack_top = stack_top - 1;
618 max_pos = temp_stack_top;
619 indx = buffer_position - 1;
620 ct = first_token;
621 lex_stream -> Reset(buffer[buffer_position]);
622 }
623
624 process_terminal:
625 while (true)
626 {
627 if (++temp_stack_top >= stack_length) /* Stack overflow!!! */
628 return indx;
629 temp_stack[temp_stack_top] = act;
630
631 act = t_action(act, ct, lex_stream);
632
633 if (act <= NUM_RULES) /* reduce action */
634 temp_stack_top--;
635 else if (act < ACCEPT_ACTION || /* shift action */
636 act > ERROR_ACTION) /*shift-reduce action*/
637 {
638 if (indx == MAX_DISTANCE)
639 return indx;
640 indx++;
641 ct = lex_stream -> Kind(buffer[indx]);
642 lex_stream -> Reset(lex_stream -> Next(buffer[indx]));
643 if (act > ERROR_ACTION)
644 act -= ERROR_ACTION;
645 else goto process_terminal;
646 }
647 else if (act == ACCEPT_ACTION) /* accept action */
648 return MAX_DISTANCE;
649 else return indx; /* error action */
650
651 process_non_terminal:
652 do
653 {
654 temp_stack_top -= (rhs[act]-1);
655 lhs_symbol = lhs[act];
656 act = (temp_stack_top > max_pos
657 ? temp_stack[temp_stack_top]
658 : stck[temp_stack_top]);
659 act = nt_action(act, lhs_symbol);
660 } while (act <= NUM_RULES);
661
662 max_pos = Min(max_pos, temp_stack_top);
663 } // process_terminal;
664
665 return 0;
666 }
667
668 #ifdef HAVE_JIKES_NAMESPACE
669 } // Close namespace Jikes block
670 #endif
671
672