1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4 (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2014 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 #include <assert.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include "Compiler2Pass.h"
33 
Compiler2Pass()34 Compiler2Pass::Compiler2Pass()
35 {
36     // reserve some memory space in the containers being used
37     mTokenInstructions.reserve(100);
38     mConstants.reserve(80);
39     // default contexts allows all contexts
40     // subclass should change it to fit the language being compiled
41     mActiveContexts = 0xffffffff;
42 
43 }
44 
45 
46 
InitSymbolTypeLib()47 void Compiler2Pass::InitSymbolTypeLib()
48 {
49     uint token_ID;
50     // find a default text for all Symbol Types in library
51 
52     // scan through all the rules and initialize TypeLib with index to text and index to rules for non-terminal tokens
53     for(int i = 0; i < mRulePathLibCnt; i++) {
54         token_ID = mRootRulePath[i].mTokenID;
55         // make sure SymbolTypeLib holds valid token
56         assert(mSymbolTypeLib[token_ID].mID == token_ID);
57         switch(mRootRulePath[i].mOperation) {
58             case otRULE:
59                 // if operation is a rule then update typelib
60                 mSymbolTypeLib[token_ID].mRuleID = i;
61 
62             case otAND:
63             case otOR:
64             case otOPTIONAL:
65                 // update text index in typelib
66                 if (mRootRulePath[i].mSymbol != NULL) mSymbolTypeLib[token_ID].mDefTextID = i;
67                 break;
68             case otREPEAT:
69             case otEND:
70                 break;
71         }
72     }
73 
74 }
75 
76 
compile(const char * source)77 bool Compiler2Pass::compile(const char* source)
78 {
79     bool Passed = false;
80 
81     mSource = source;
82     // start compiling if there is a rule base to work with
83     if(mRootRulePath != NULL) {
84          Passed = doPass1();
85 
86         if(Passed) {
87             Passed = doPass2();
88         }
89     }
90     return Passed;
91 }
92 
93 
doPass1()94 bool Compiler2Pass::doPass1()
95 {
96     // scan through Source string and build a token list using TokenInstructions
97     // this is a simple brute force lexical scanner/analyzer that also parses the formed
98     // token for proper semantics and context in one pass
99 
100     mCurrentLine = 1;
101     mCharPos = 0;
102     // reset position in Constants container
103     mConstants.clear();
104     mEndOfSource = strlen(mSource);
105 
106     // start with a clean slate
107     mTokenInstructions.clear();
108     // tokenize and check semantics until an error occurs or end of source is reached
109     // assume RootRulePath has pointer to rules so start at index + 1 for first rule path
110     // first rule token would be a rule definition so skip over it
111     bool passed = processRulePath(0);
112     // if a symbol in source still exists then the end of source was not reached and there was a problem some where
113     if (positionToNextSymbol()) passed = false;
114     return passed;
115 
116 }
117 
118 
processRulePath(uint rulepathIDX)119 bool Compiler2Pass::processRulePath( uint rulepathIDX)
120 {
121     // rule path determines what tokens and therefore what symbols are acceptable from the source
122     // it is assumed that the tokens with the longest similar symbols are arranged first so
123     // if a match is found it is accepted and no further searching is done
124 
125     // record position of last token in container
126     // to be used as the rollback position if a valid token is not found
127     uint TokenContainerOldSize = mTokenInstructions.size();
128     int OldCharPos = mCharPos;
129     int OldLinePos = mCurrentLine;
130     uint OldConstantsSize = mConstants.size();
131 
132     // keep track of what non-terminal token activated the rule
133     uint ActiveNTTRule = mRootRulePath[rulepathIDX].mTokenID;
134     // start rule path at next position for definition
135     rulepathIDX++;
136 
137     // assume the rule will pass
138     bool Passed = true;
139     bool EndFound = false;
140 
141     // keep following rulepath until the end is reached
142     while (EndFound == false) {
143         switch (mRootRulePath[rulepathIDX].mOperation) {
144 
145             case otAND:
146                 // only validate if the previous rule passed
147                 if(Passed) Passed = ValidateToken(rulepathIDX, ActiveNTTRule);
148                 break;
149 
150             case otOR:
151                 // only validate if the previous rule failed
152                 if ( Passed == false ) {
153                     // clear previous tokens from entry and try again
154                     mTokenInstructions.resize(TokenContainerOldSize);
155                     Passed = ValidateToken(rulepathIDX, ActiveNTTRule);
156                 }
157                 else { // path passed up to this point therefore finished so pretend end marker found
158                     EndFound = true;
159                 }
160                 break;
161 
162             case otOPTIONAL:
163                 // if previous passed then try this rule but it does not effect succes of rule since its optional
164                 if(Passed) ValidateToken(rulepathIDX, ActiveNTTRule);
165                 break;
166 
167             case otREPEAT:
168                 // repeat until no tokens of this type found
169                 // at least one must be found
170                 if(Passed) {
171                     int TokensPassed = 0;
172                     // keep calling until failure
173                     while ((Passed = ValidateToken(rulepathIDX, ActiveNTTRule))) {
174                         // increment count for previous passed token
175                         TokensPassed++;
176                     }
177                     // defaults to Passed = fail
178                     // if at least one token found then return passed = true
179                     if (TokensPassed > 0) Passed = true;
180                 }
181                 break;
182 
183             case otEND:
184                 // end of rule found so time to return
185                 EndFound = true;
186                 if(Passed == false) {
187                     // the rule did not validate so get rid of tokens decoded
188                     // roll back the token container end position to what it was when rule started
189                     // this will get rid of all tokens that had been pushed on the container while
190                     // trying to validating this rule
191                     mTokenInstructions.resize(TokenContainerOldSize);
192                     mConstants.resize(OldConstantsSize);
193                     mCharPos = OldCharPos;
194                     mCurrentLine = OldLinePos;
195                 }
196                 break;
197 
198             default:
199                 // an exception should be raised since the code should never get here
200                 Passed = false;
201                 EndFound = true;
202                 break;
203 
204         }
205 
206 
207         // move on to the next rule in the path
208         rulepathIDX++;
209     }
210 
211     return Passed;
212 
213 }
214 
215 
ValidateToken(const uint rulepathIDX,const uint activeRuleID)216 bool Compiler2Pass::ValidateToken(const uint rulepathIDX, const uint activeRuleID)
217 {
218     int tokenlength = 0;
219     // assume the test is going to fail
220     bool Passed = false;
221     uint TokenID = mRootRulePath[rulepathIDX].mTokenID;
222     // only validate token if context is correct
223     if (mSymbolTypeLib[TokenID].mContextKey & mActiveContexts) {
224 
225         // if terminal token then compare text of symbol with what is in source
226         if ( mSymbolTypeLib[TokenID].mRuleID == 0){
227 
228             if (positionToNextSymbol()) {
229                 // if Token is supposed to be a number then check if its a numerical constant
230                 if (TokenID == mValueID) {
231                     float constantvalue;
232                     if((Passed = isFloatValue(constantvalue, tokenlength))) {
233                         mConstants.push_back(constantvalue);
234                     }
235 
236                 }
237                 // compare token symbol text with source text
238                 else Passed = isSymbol(mRootRulePath[rulepathIDX].mSymbol, tokenlength);
239 
240                 if(Passed) {
241                     TokenInst newtoken;
242                     // push token onto end of container
243                     newtoken.mID = TokenID;
244                     newtoken.mNTTRuleID = activeRuleID;
245                     newtoken.mLine = mCurrentLine;
246                     newtoken.mPos = mCharPos;
247 
248                     mTokenInstructions.push_back(newtoken);
249                     // update source position
250                     mCharPos += tokenlength;
251 
252                     // allow token instruction to change the ActiveContexts
253                     // use token contexts pattern to clear ActiveContexts pattern bits
254                     mActiveContexts &= ~mSymbolTypeLib[TokenID].mContextPatternClear;
255                     // use token contexts pattern to set ActiveContexts pattern bits
256                     mActiveContexts |= mSymbolTypeLib[TokenID].mContextPatternSet;
257                 }
258             }
259 
260         }
261         // else a non terminal token was found
262         else {
263 
264             // execute rule for non-terminal
265             // get rule_ID for index into  rulepath to be called
266             Passed = processRulePath(mSymbolTypeLib[TokenID].mRuleID);
267         }
268     }
269 
270 
271     return Passed;
272 
273 }
274 
275 
getTypeDefText(const uint sid)276 const char* Compiler2Pass::getTypeDefText(const uint sid)
277 {
278     return mRootRulePath[mSymbolTypeLib[sid].mDefTextID].mSymbol;
279 }
280 
281 
isFloatValue(float & fvalue,int & charsize)282 bool Compiler2Pass::isFloatValue(float& fvalue, int& charsize)
283 {
284     // check to see if it is a numeric float value
285     bool valuefound = false;
286 
287     const char* startptr = mSource + mCharPos;
288     char* endptr = NULL;
289 
290     fvalue = (float)strtod(startptr, &endptr);
291     // if a valid float was found then endptr will have the pointer to the first invalid character
292     if(endptr) {
293         if(endptr>startptr) {
294             // a valid value was found so process it
295             charsize = endptr - startptr;
296             valuefound = true;
297         }
298     }
299 
300     return valuefound;
301 }
302 
303 
isSymbol(const char * symbol,int & symbolsize)304 bool Compiler2Pass::isSymbol(const char* symbol, int& symbolsize)
305 {
306     // compare text at source+charpos with the symbol : limit testing to symbolsize
307     bool symbolfound = false;
308     symbolsize = strlen(symbol);
309     if(strncmp(mSource + mCharPos, symbol, symbolsize)==0) {
310         symbolfound = true;
311     }
312 
313     return symbolfound;
314 }
315 
316 
positionToNextSymbol()317 bool Compiler2Pass::positionToNextSymbol()
318 {
319     bool validsymbolfound = false;
320     bool endofsource = false;
321     while(!validsymbolfound && !endofsource) {
322         skipWhiteSpace();
323         skipEOL();
324         skipComments();
325         // have we reached the end of the string?
326         if (mCharPos == mEndOfSource) endofsource = true;
327         else {
328             // if ASCII > space then assume valid character is found
329             if (mSource[mCharPos] > ' ') validsymbolfound = true;
330         }
331     }// end of while
332 
333     return validsymbolfound;
334 }
335 
336 
337 
skipComments()338 void Compiler2Pass::skipComments()
339 {
340   // if current char and next are // then search for EOL
341     if(mCharPos < mEndOfSource) {
342         if( ((mSource[mCharPos] == '/') && (mSource[mCharPos + 1] == '/')) ||
343             (mSource[mCharPos] == ';') ||
344             (mSource[mCharPos] == '#') ) findEOL();
345     }
346 }
347 
348 
findEOL()349 void Compiler2Pass::findEOL()
350 {
351     // find eol charter and move to this position
352     const char* newpos = strchr(&mSource[mCharPos], '\n');
353     if(newpos) {
354         mCharPos += newpos - &mSource[mCharPos];
355     }
356     // couldn't find end of line so skip to the end
357     else mCharPos = mEndOfSource - 1;
358 
359 }
360 
361 
skipEOL()362 void Compiler2Pass::skipEOL()
363 {
364     if ((mSource[mCharPos] == '\n') || (mSource[mCharPos] == '\r')) {
365         mCurrentLine++;
366         mCharPos++;
367         if ((mSource[mCharPos] == '\n') || (mSource[mCharPos] == '\r')) {
368             mCharPos++;
369         }
370     }
371 }
372 
373 
skipWhiteSpace()374 void Compiler2Pass::skipWhiteSpace()
375 {
376     // FIX - this method kinda slow
377     while((mSource[mCharPos] == ' ') || (mSource[mCharPos] == '\t')) mCharPos++; // find first non white space character
378 }
379 
380