1 #include "stdafx.h"
2 #include "Parser.h"
3 #include "ExpressionParser.h"
4 #include "Core/Misc.h"
5 #include "Commands/CommandSequence.h"
6 #include "Commands/CAssemblerLabel.h"
7 #include "Core/Common.h"
8 #include "Util/Util.h"
9
isPartOfList(const std::wstring & value,const std::initializer_list<const wchar_t * > & terminators)10 inline bool isPartOfList(const std::wstring& value, const std::initializer_list<const wchar_t*>& terminators)
11 {
12 for (const wchar_t* term: terminators)
13 {
14 if (value == term)
15 return true;
16 }
17
18 return false;
19 }
20
Parser()21 Parser::Parser()
22 {
23 initializingMacro = false;
24 overrideFileInfo = false;
25 conditionStack.push_back({true,false});
26 clearError();
27 }
28
pushConditionalResult(ConditionalResult cond)29 void Parser::pushConditionalResult(ConditionalResult cond)
30 {
31 ConditionInfo info = conditionStack.back();
32 info.inTrueBlock = info.inTrueBlock && cond != ConditionalResult::False;
33 info.inUnknownBlock = info.inUnknownBlock || cond == ConditionalResult::Unknown;
34 conditionStack.push_back(info);
35 }
36
parseExpression()37 Expression Parser::parseExpression()
38 {
39 return ::parseExpression(*getTokenizer(), !isInsideTrueBlock() || isInsideUnknownBlock());
40 }
41
parseExpressionList(std::vector<Expression> & list,int min,int max)42 bool Parser::parseExpressionList(std::vector<Expression>& list, int min, int max)
43 {
44 bool valid = true;
45 list.clear();
46 list.reserve(max >= 0 ? max : 32);
47
48 const Token& start = peekToken();
49
50 Expression exp = parseExpression();
51 list.push_back(exp);
52
53 if (exp.isLoaded() == false)
54 {
55 printError(start,L"Parameter failure");
56 getTokenizer()->skipLookahead();
57 valid = false;
58 }
59
60 while (peekToken().type == TokenType::Comma)
61 {
62 eatToken();
63
64 exp = parseExpression();
65 list.push_back(exp);
66
67 if (exp.isLoaded() == false)
68 {
69 printError(start,L"Parameter failure");
70 getTokenizer()->skipLookahead();
71 valid = false;
72 }
73 }
74
75 if (list.size() < (size_t) min)
76 {
77 printError(start,L"Not enough parameters (min %d)",min);
78 return false;
79 }
80
81 if (max != -1 && (size_t) max < list.size())
82 {
83 printError(start,L"Too many parameters (max %d)",max);
84 return false;
85 }
86
87 return valid;
88 }
89
parseIdentifier(std::wstring & dest)90 bool Parser::parseIdentifier(std::wstring& dest)
91 {
92 const Token& tok = nextToken();
93 if (tok.type != TokenType::Identifier)
94 return false;
95
96 dest = tok.getStringValue();
97 return true;
98 }
99
parseCommandSequence(wchar_t indicator,const std::initializer_list<const wchar_t * > terminators)100 std::unique_ptr<CAssemblerCommand> Parser::parseCommandSequence(wchar_t indicator, const std::initializer_list<const wchar_t*> terminators)
101 {
102 auto sequence = make_unique<CommandSequence>();
103
104 bool foundTermination = false;
105 while (atEnd() == false)
106 {
107 const Token &next = peekToken();
108
109 if(next.type == TokenType::Separator)
110 {
111 eatToken();
112 continue;
113 }
114
115 if (next.stringValueStartsWith(indicator) && isPartOfList(next.getStringValue(), terminators))
116 {
117 foundTermination = true;
118 break;
119 }
120
121 bool foundSomething = false;
122 while (checkEquLabel() || checkMacroDefinition())
123 {
124 // do nothing, just parse all the equs and macros there are
125 if (hasError())
126 sequence->addCommand(handleError());
127
128 foundSomething = true;
129 }
130
131 if (foundSomething)
132 continue;
133
134 std::unique_ptr<CAssemblerCommand> cmd = parseCommand();
135
136 // omit commands inside blocks that are trivially false
137 if (isInsideTrueBlock() == false)
138 {
139 continue;
140 }
141
142 sequence->addCommand(std::move(cmd));
143 }
144
145 if (!foundTermination && terminators.size())
146 {
147 std::wstring expected;
148 for (const wchar_t* terminator : terminators)
149 {
150 if (!expected.empty())
151 expected += L", ";
152 expected += terminator;
153 }
154
155 Logger::printError(Logger::Error, L"Unterminated command sequence, expected any of %s.", expected);
156 }
157
158 return std::move(sequence);
159 }
160
parseFile(TextFile & file,bool virtualFile)161 std::unique_ptr<CAssemblerCommand> Parser::parseFile(TextFile& file, bool virtualFile)
162 {
163 FileTokenizer tokenizer;
164 if (tokenizer.init(&file) == false)
165 return nullptr;
166
167 std::unique_ptr<CAssemblerCommand> result = parse(&tokenizer,virtualFile,file.getFileName());
168
169 if (file.isFromMemory() == false)
170 Global.FileInfo.TotalLineCount += file.getNumLines();
171
172 return result;
173 }
174
parseString(const std::wstring & text)175 std::unique_ptr<CAssemblerCommand> Parser::parseString(const std::wstring& text)
176 {
177 TextFile file;
178 file.openMemory(text);
179 return parseFile(file,true);
180 }
181
parseTemplate(const std::wstring & text,std::initializer_list<AssemblyTemplateArgument> variables)182 std::unique_ptr<CAssemblerCommand> Parser::parseTemplate(const std::wstring& text, std::initializer_list<AssemblyTemplateArgument> variables)
183 {
184 std::wstring fullText = text;
185
186 overrideFileInfo = true;
187 overrideFileNum = Global.FileInfo.FileNum;
188 overrideLineNum = Global.FileInfo.LineNumber;
189
190 for (auto& arg: variables)
191 {
192 size_t count = replaceAll(fullText,arg.variableName,arg.value);
193
194 #ifdef _DEBUG
195 if (count != 0 && arg.value.empty())
196 Logger::printError(Logger::Warning,L"Empty replacement for %s",arg.variableName);
197 #endif
198 }
199
200 std::unique_ptr<CAssemblerCommand> result = parseString(fullText);
201 overrideFileInfo = false;
202
203 return result;
204 }
205
parseDirective(const DirectiveMap & directiveSet)206 std::unique_ptr<CAssemblerCommand> Parser::parseDirective(const DirectiveMap &directiveSet)
207 {
208 const Token &tok = peekToken();
209 if (tok.type != TokenType::Identifier)
210 return nullptr;
211
212 const std::wstring stringValue = tok.getStringValue();
213
214 auto matchRange = directiveSet.equal_range(stringValue);
215 for (auto it = matchRange.first; it != matchRange.second; ++it)
216 {
217 const DirectiveEntry &directive = it->second;
218
219 if (directive.flags & DIRECTIVE_DISABLED)
220 continue;
221 if ((directive.flags & DIRECTIVE_NOCASHOFF) && Global.nocash == true)
222 continue;
223 if ((directive.flags & DIRECTIVE_NOCASHON) && Global.nocash == false)
224 continue;
225 if ((directive.flags & DIRECTIVE_NOTINMEMORY) && Global.memoryMode == true)
226 continue;
227
228 if (directive.flags & DIRECTIVE_MIPSRESETDELAY)
229 Arch->NextSection();
230
231 eatToken();
232 std::unique_ptr<CAssemblerCommand> result = directive.function(*this,directive.flags);
233 if (result == nullptr)
234 {
235 if (hasError() == false)
236 printError(tok,L"Directive parameter failure");
237 return nullptr;
238 } else if (!(directive.flags & DIRECTIVE_MANUALSEPARATOR) && nextToken().type != TokenType::Separator)
239 {
240 printError(tok,L"Directive not terminated");
241 return nullptr;
242 }
243
244 return result;
245 }
246
247 return nullptr;
248 }
249
matchToken(TokenType type,bool optional)250 bool Parser::matchToken(TokenType type, bool optional)
251 {
252 if (optional)
253 {
254 const Token& token = peekToken();
255 if (token.type == type)
256 eatToken();
257 return true;
258 }
259
260 return nextToken().type == type;
261 }
262
parse(Tokenizer * tokenizer,bool virtualFile,const std::wstring & name)263 std::unique_ptr<CAssemblerCommand> Parser::parse(Tokenizer* tokenizer, bool virtualFile, const std::wstring& name)
264 {
265 if (entries.size() >= 150)
266 {
267 Logger::queueError(Logger::Error, L"Max include/recursion depth reached");
268 return nullptr;
269 }
270
271 FileEntry entry;
272 entry.tokenizer = tokenizer;
273 entry.virtualFile = virtualFile;
274
275 if (virtualFile == false && name.empty() == false)
276 {
277 entry.fileNum = (int) Global.FileInfo.FileList.size();
278 Global.FileInfo.FileList.push_back(name);
279 } else {
280 entry.fileNum = -1;
281 }
282
283 entries.push_back(entry);
284
285 std::unique_ptr<CAssemblerCommand> sequence = parseCommandSequence();
286 entries.pop_back();
287
288 return sequence;
289 }
290
addEquation(const Token & startToken,const std::wstring & name,const std::wstring & value)291 void Parser::addEquation(const Token& startToken, const std::wstring& name, const std::wstring& value)
292 {
293 // parse value string
294 TextFile f;
295 f.openMemory(value);
296
297 FileTokenizer tok;
298 tok.init(&f);
299
300 TokenizerPosition start = tok.getPosition();
301 while (tok.atEnd() == false && tok.peekToken().type != TokenType::Separator)
302 {
303 const Token& token = tok.nextToken();
304 if (token.type == TokenType::Identifier && token.getStringValue() == name)
305 {
306 printError(startToken,L"Recursive equ definition for \"%s\" not allowed",name);
307 return;
308 }
309
310 if (token.type == TokenType::Equ)
311 {
312 printError(startToken,L"equ value must not contain another equ instance");
313 return;
314 }
315 }
316
317 // extract tokens
318 TokenizerPosition end = tok.getPosition();
319 std::vector<Token> tokens = tok.getTokens(start, end);
320 size_t index = Tokenizer::addEquValue(tokens);
321
322 for (FileEntry& entry : entries)
323 entry.tokenizer->resetLookaheadCheckMarks();
324
325 // register equation
326 Global.symbolTable.addEquation(name, Global.FileInfo.FileNum, Global.Section, index);
327 }
328
checkEquLabel()329 bool Parser::checkEquLabel()
330 {
331 updateFileInfo();
332
333 const Token& start = peekToken();
334 if (start.type == TokenType::Identifier)
335 {
336 int pos = 1;
337 if (peekToken(pos).type == TokenType::Colon)
338 pos++;
339
340 if (peekToken(pos).type == TokenType::Equ &&
341 peekToken(pos+1).type == TokenType::EquValue)
342 {
343 std::wstring name = peekToken(0).getStringValue();
344 std::wstring value = peekToken(pos+1).getStringValue();
345 eatTokens(pos+2);
346
347 // skip the equ if it's inside a false conditional block
348 if (isInsideTrueBlock() == false)
349 return true;
350
351 // equs can't be inside blocks whose condition can only be
352 // evaluated during validation
353 if (isInsideUnknownBlock())
354 {
355 printError(start,L"equ not allowed inside of block with non-trivial condition");
356 return true;
357 }
358
359 // equs are not allowed in macros
360 if (initializingMacro)
361 {
362 printError(start,L"equ not allowed in macro");
363 return true;
364 }
365
366 if (Global.symbolTable.isValidSymbolName(name) == false)
367 {
368 printError(start,L"Invalid equation name %s",name);
369 return true;
370 }
371
372 if (Global.symbolTable.symbolExists(name,Global.FileInfo.FileNum,Global.Section))
373 {
374 printError(start,L"Equation name %s already defined",name);
375 return true;
376 }
377
378 addEquation(start,name,value);
379 return true;
380 }
381 }
382
383 return false;
384 }
385
checkMacroDefinition()386 bool Parser::checkMacroDefinition()
387 {
388 const Token& first = peekToken();
389 if (first.type != TokenType::Identifier)
390 return false;
391
392 if (!first.stringValueStartsWith(L'.') || first.getStringValue() != L".macro")
393 return false;
394
395 eatToken();
396
397 // nested macro definitions are not allowed
398 if (initializingMacro)
399 {
400 printError(first,L"Nested macro definitions not allowed");
401 while (!atEnd())
402 {
403 const Token& token = nextToken();
404 if (token.type == TokenType::Identifier && token.getStringValue() == L".endmacro")
405 break;
406 }
407
408 return true;
409 }
410
411 std::vector<Expression> parameters;
412 if (parseExpressionList(parameters,1,-1) == false)
413 return false;
414
415 ParserMacro macro;
416 macro.counter = 0;
417
418 // load name
419 if (parameters[0].evaluateIdentifier(macro.name) == false)
420 return false;
421
422 // load parameters
423 for (size_t i = 1; i < parameters.size(); i++)
424 {
425 std::wstring name;
426 if (parameters[i].evaluateIdentifier(name) == false)
427 return false;
428
429 macro.parameters.push_back(name);
430 }
431
432 if(nextToken().type != TokenType::Separator)
433 {
434 printError(first,L"Macro directive not terminated");
435 return false;
436 }
437
438 // load macro content
439
440 TokenizerPosition start = getTokenizer()->getPosition();
441 bool valid = false;
442 while (atEnd() == false)
443 {
444 const Token& tok = nextToken();
445 if (tok.type == TokenType::Identifier && tok.getStringValue() == L".endmacro")
446 {
447 valid = true;
448 break;
449 }
450 }
451
452 // Macros have to be defined at parse time, so they can't be defined in blocks
453 // with non-trivial conditions
454 if (isInsideUnknownBlock())
455 {
456 printError(first, L"Macro definition not allowed inside of block with non-trivial condition");
457 return false;
458 }
459
460 // if we are in a known false block, don't define the macro
461 if (!isInsideTrueBlock())
462 return true;
463
464 // duplicate check
465 if (macros.find(macro.name) != macros.end())
466 {
467 printError(first, L"Macro \"%s\" already defined", macro.name);
468 return false;
469 }
470
471 // no .endmacro, not valid
472 if (valid == false)
473 {
474 printError(first, L"Macro \"%s\" not terminated", macro.name);
475 return true;
476 }
477
478 // get content
479 TokenizerPosition end = getTokenizer()->getPosition().previous();
480 macro.content = getTokenizer()->getTokens(start,end);
481
482 if(nextToken().type != TokenType::Separator)
483 {
484 printError(first,L"Endmacro directive not terminated");
485 return false;
486 }
487
488 macros[macro.name] = macro;
489 return true;
490 }
491
parseMacroCall()492 std::unique_ptr<CAssemblerCommand> Parser::parseMacroCall()
493 {
494 const Token& start = peekToken();
495 if (start.type != TokenType::Identifier)
496 return nullptr;
497
498 auto it = macros.find(start.getStringValue());
499 if (it == macros.end())
500 return nullptr;
501
502 ParserMacro& macro = it->second;
503 eatToken();
504
505 // create a token stream for the macro content,
506 // registering replacements for parameter values
507 TokenStreamTokenizer macroTokenizer;
508
509 std::set<std::wstring> identifierParameters;
510 for (size_t i = 0; i < macro.parameters.size(); i++)
511 {
512 if (peekToken().type == TokenType::Separator)
513 {
514 printError(start,L"Too few macro arguments (%d vs %d)",i,macro.parameters.size());
515 return nullptr;
516 }
517
518 if (i != 0)
519 {
520 if (nextToken().type != TokenType::Comma)
521 {
522 printError(start,L"Macro arguments not comma-separated");
523 return nullptr;
524 }
525 }
526
527 TokenizerPosition startPos = getTokenizer()->getPosition();
528 Expression exp = parseExpression();
529 if (exp.isLoaded() == false)
530 {
531 printError(start,L"Invalid macro argument expression");
532 return nullptr;
533 }
534
535 TokenizerPosition endPos = getTokenizer()->getPosition();
536 std::vector<Token> tokens = getTokenizer()->getTokens(startPos,endPos);
537
538 // remember any single identifier parameters for the label replacement
539 if (tokens.size() == 1 && tokens[0].type == TokenType::Identifier)
540 identifierParameters.insert(tokens[0].getStringValue());
541
542 // give them as a replacement to new tokenizer
543 macroTokenizer.registerReplacement(macro.parameters[i],tokens);
544 }
545
546 if (peekToken().type == TokenType::Comma)
547 {
548 size_t count = macro.parameters.size();
549 while (peekToken().type == TokenType::Comma)
550 {
551 eatToken();
552 parseExpression();
553 count++;
554 }
555
556 printError(start,L"Too many macro arguments (%d vs %d)",count,macro.parameters.size());
557 return nullptr;
558 }
559
560 if(nextToken().type != TokenType::Separator)
561 {
562 printError(start,L"Macro call not terminated");
563 return nullptr;
564 }
565
566 // skip macro instantiation in known false blocks
567 if (!isInsideUnknownBlock() && !isInsideTrueBlock())
568 return make_unique<DummyCommand>();
569
570 // a macro is fully parsed once when it's loaded
571 // to gather all labels. it's not necessary to
572 // instantiate other macros at that time
573 if (initializingMacro)
574 return make_unique<DummyCommand>();
575
576 // the first time a macro is instantiated, it needs to be analyzed
577 // for labels
578 if (macro.counter == 0)
579 {
580 initializingMacro = true;
581
582 // parse the short lived next command
583 macroTokenizer.init(macro.content);
584 Logger::suppressErrors();
585 std::unique_ptr<CAssemblerCommand> command = parse(¯oTokenizer,true);
586 Logger::unsuppressErrors();
587
588 macro.labels = macroLabels;
589 macroLabels.clear();
590
591 initializingMacro = false;
592 }
593
594 // register labels and replacements
595 for (const std::wstring& label: macro.labels)
596 {
597 // check if the label is using the name of a parameter
598 // in that case, don't register a unique replacement
599 if (identifierParameters.find(label) != identifierParameters.end())
600 continue;
601
602 // otherwise make sure the name is unique
603 std::wstring fullName;
604 if (Global.symbolTable.isLocalSymbol(label))
605 fullName = formatString(L"@@%s_%s_%08X",macro.name,label.substr(2),macro.counter);
606 else if (Global.symbolTable.isStaticSymbol(label))
607 fullName = formatString(L"@%s_%s_%08X",macro.name,label.substr(1),macro.counter);
608 else
609 fullName = formatString(L"%s_%s_%08X",macro.name,label,macro.counter);
610
611 macroTokenizer.registerReplacement(label,fullName);
612 }
613
614 macroTokenizer.init(macro.content);
615 macro.counter++;
616
617 return parse(¯oTokenizer,true);
618
619 }
620
parseLabel()621 std::unique_ptr<CAssemblerCommand> Parser::parseLabel()
622 {
623 updateFileInfo();
624
625 const Token& start = peekToken(0);
626
627 if (peekToken(0).type == TokenType::Identifier &&
628 peekToken(1).type == TokenType::Colon)
629 {
630 const std::wstring name = start.getStringValue();
631 eatTokens(2);
632
633 if (initializingMacro)
634 macroLabels.insert(name);
635
636 if (Global.symbolTable.isValidSymbolName(name) == false)
637 {
638 printError(start,L"Invalid label name");
639 return nullptr;
640 }
641
642 return ::make_unique<CAssemblerLabel>(name,start.getOriginalText());
643 }
644
645 return nullptr;
646 }
647
handleError()648 std::unique_ptr<CAssemblerCommand> Parser::handleError()
649 {
650 // skip the rest of the statement
651 while (!atEnd() && nextToken().type != TokenType::Separator);
652
653 clearError();
654 return make_unique<InvalidCommand>();
655 }
656
657
updateFileInfo()658 void Parser::updateFileInfo()
659 {
660 if (overrideFileInfo)
661 {
662 Global.FileInfo.FileNum = overrideFileNum;
663 Global.FileInfo.LineNumber = overrideLineNum;
664 return;
665 }
666
667 for (size_t i = entries.size(); i > 0; i--)
668 {
669 size_t index = i-1;
670
671 if (entries[index].virtualFile == false && entries[index].fileNum != -1)
672 {
673 Global.FileInfo.FileNum = entries[index].fileNum;
674
675 // if it's not the topmost file, then the command to instantiate the
676 // following files was already parsed -> take the previous command's line
677 if (index != entries.size() - 1)
678 Global.FileInfo.LineNumber = entries[index].previousCommandLine;
679 else
680 {
681 Global.FileInfo.LineNumber = (int)entries[index].tokenizer->peekToken().line;
682 entries[index].previousCommandLine = Global.FileInfo.LineNumber;
683 }
684 return;
685 }
686 }
687 }
688
parseCommand()689 std::unique_ptr<CAssemblerCommand> Parser::parseCommand()
690 {
691 std::unique_ptr<CAssemblerCommand> command;
692
693 updateFileInfo();
694
695 if (atEnd())
696 return make_unique<DummyCommand>();
697
698 if ((command = parseLabel()) != nullptr)
699 return command;
700 if (hasError())
701 return handleError();
702
703 if ((command = parseMacroCall()) != nullptr)
704 return command;
705 if (hasError())
706 return handleError();
707
708 if ((command = Arch->parseDirective(*this)) != nullptr)
709 return command;
710 if (hasError())
711 return handleError();
712
713 if ((command = parseDirective(directives)) != nullptr)
714 return command;
715 if (hasError())
716 return handleError();
717
718 if ((command = Arch->parseOpcode(*this)) != nullptr)
719 return command;
720 if (hasError())
721 return handleError();
722
723 const Token& token = peekToken();
724 printError(token,L"Parse error '%s'",token.getOriginalText());
725 return handleError();
726 }
727
addEntry(int result,TokenSequence tokens,TokenValueSequence values)728 void TokenSequenceParser::addEntry(int result, TokenSequence tokens, TokenValueSequence values)
729 {
730 Entry entry = { tokens, values, result };
731 entries.push_back(entry);
732 }
733
parse(Parser & parser,int & result)734 bool TokenSequenceParser::parse(Parser& parser, int& result)
735 {
736 for (Entry& entry: entries)
737 {
738 TokenizerPosition pos = parser.getTokenizer()->getPosition();
739 auto values = entry.values.begin();
740
741 bool valid = true;
742 for (TokenType type: entry.tokens)
743 {
744 // check of token type matches
745 const Token& token = parser.nextToken();
746 if (token.type != type)
747 {
748 valid = false;
749 break;
750 }
751
752 // if necessary, check if the value of the token also matches
753 if (type == TokenType::Identifier)
754 {
755 if (values == entry.values.end() || values->textValue != token.getStringValue())
756 {
757 valid = false;
758 break;
759 }
760
761 values++;
762 } else if (type == TokenType::Integer)
763 {
764 if (values == entry.values.end() || values->intValue != token.intValue)
765 {
766 valid = false;
767 break;
768 }
769
770 values++;
771 }
772 }
773
774 if (valid && values == entry.values.end())
775 {
776 result = entry.result;
777 return true;
778 }
779
780 parser.getTokenizer()->setPosition(pos);
781 }
782
783 return false;
784 }
785