1 // Copyright 2009 The Archiveopteryx Developers <info@aox.org>
2
3 #include "parser.h"
4
5
6 /*! \class Parser parser.h
7
8 The Parser class does basic C++ parsing.
9
10 It doesn't actually parse C++: all it does is lend some support to
11 the header and source handling, which needs to find certain
12 constructs and look at them.
13 */
14
15
16 /*! Constructs a Parser for string \a s. The parser's cursor is left
17 at the beginning of \a s. */
18
Parser(const EString & s)19 Parser::Parser( const EString & s )
20 : t( s ), i( 0 ), ln( 1 ), li( 0 )
21 {
22 // nothing necessary
23 }
24
25
26 /*! Returns true if the parser has reached the end of its input, and
27 false if not.
28 */
29
atEnd() const30 bool Parser::atEnd() const
31 {
32 return i >= t.length();
33 }
34
35
36 /*! Returns the parser's current line number.
37
38 The line number is that of the first unparsed nonwhitespace
39 character. This implies that if the parser's cursor is at the end of a
40 line, then the line number returned is that of the next nonempty line.
41 */
42
line()43 uint Parser::line()
44 {
45 if ( li > i ) {
46 ln = 1;
47 li = 0;
48 }
49 while ( li < i ||
50 ( t[li] == 32 || t[li] == 9 || t[li] == 13 || t[li] == 10 ) ) {
51 if ( t[li] == 10 )
52 ln++;
53 li++;
54 }
55 return ln;
56 }
57
58
59 /*! Scans forward until an instance of \a text is found, and positions
60 the cursor at the first character after that string. */
61
scan(const EString & text)62 void Parser::scan( const EString & text )
63 {
64 uint j = 0;
65 while ( i < t.length() && j < text.length() ) {
66 j = 0;
67 while ( j < text.length() && t[i+j] == text[j] )
68 j++;
69 if ( j < text.length() )
70 i++;
71 }
72 if ( j == text.length() )
73 i += j;
74 }
75
76
77 /*! Scans for \a text and returns all the text, without the trailing
78 instance of \a text. The cursor is left after \a text. */
79
textUntil(const EString & text)80 EString Parser::textUntil( const EString & text )
81 {
82 uint j = i;
83 scan( text );
84 if ( atEnd() )
85 return t.mid( j, i-j );
86 return t.mid( j, i-j-text.length() );
87 }
88
89
90 /*! Scans past whitespace, leaving the cursor at the end or at a
91 nonwhitespace character.
92 */
93
whitespace()94 void Parser::whitespace()
95 {
96 i = whitespace( i );
97 }
98
99
spaceless(const EString & t)100 static EString spaceless( const EString & t )
101 {
102 uint i = 0;
103 EString r;
104 while ( i < t.length() ) {
105 if ( t[i] != 32 && t[i] != 9 && t[i] != 13 && t[i] != 10 )
106 r.append( t[i] );
107 i++;
108 }
109 return r;
110 }
111
112
113 /*! Returns the C++ identifier at the cursor, or an empty string if
114 there isn't any. Steps past the identifier and any trailing whitespace.
115 */
116
identifier()117 EString Parser::identifier()
118 {
119 int j = complexIdentifier( i );
120 EString r = spaceless( t.mid( i, j - i ) );
121 i = j;
122 return r;
123 }
124
125
126 /*! Scans past the simpler identifier starting at \a j, returning the
127 first position afte the identifier. If something goes wrong,
128 simpleIdentifier() returns \a j.
129
130 A simple identifier is a text label not containing ::, <, >,
131 whitespace or the like.
132 */
133
simpleIdentifier(uint j)134 uint Parser::simpleIdentifier( uint j )
135 {
136 uint k = whitespace( j );
137 if ( t.mid( k, 8 ) == "operator" )
138 return operatorHack( k );
139 if ( ( t[k] >= 'A' && t[k] <= 'z' ) ||
140 ( t[k] >= 'a' && t[k] <= 'z' ) ) {
141 j = k + 1;
142 while ( ( t[j] >= 'A' && t[j] <= 'z' ) ||
143 ( t[j] >= 'a' && t[j] <= 'z' ) ||
144 ( t[j] >= '0' && t[j] <= '9' ) ||
145 ( t[j] == '_' ) )
146 j++;
147 }
148 return j;
149 }
150
151
152 /*! Scans past the complex identifier starting at \a j, returning the
153 first position after the identifier. If something goes wrong,
154 complexIdentifier() returns \a j.
155
156 A complex identifier is anything that may be used as an identifier
157 in C++, even "operator const char *".
158 */
159
complexIdentifier(uint j)160 uint Parser::complexIdentifier( uint j )
161 {
162 uint k = whitespace( j );
163 if ( t[k] == ':' && t[k+1] == ':' )
164 k = whitespace( k + 2 );
165 uint l = simpleIdentifier( k );
166 if ( l == k )
167 return j;
168 j = whitespace( l );
169
170 while ( t[j] == ':' && t[j+1] == ':' ) {
171 if ( t.mid( j+2, 8 ) == "operator" )
172 j = operatorHack( j+2 );
173 else if ( t[j+2] == '~' )
174 j = simpleIdentifier( j + 3 );
175 else
176 j = simpleIdentifier( j + 2 );
177 }
178
179 j = whitespace( j );
180 if ( t[j] == '<' ) {
181 k = complexIdentifier( j + 1 );
182 if ( k > j + 1 && t[k] == '>' )
183 j = k+1;
184 }
185 return j;
186 }
187
188
189 /*! Parses a type name starting at \a j and returns the first
190 character after the type name (and after trailing whitespace). If
191 a type name can't be parsed, \a j is returned.
192 */
193
type(uint j)194 uint Parser::type( uint j )
195 {
196 // first, we have zero or more of const, static etc.
197 uint l = j;
198 uint k;
199 do {
200 k = l;
201 l = whitespace( k );
202 while ( t[l] >= 'a' && t[l] <= 'z' )
203 l++;
204 EString modifier = t.mid( k, l-k ).simplified();
205 if ( !( modifier == "const" ||
206 modifier == "inline" ||
207 modifier == "unsigned" ||
208 modifier == "signed" ||
209 modifier == "class" ||
210 modifier == "struct" ||
211 modifier == "virtual" ||
212 modifier == "static" ) )
213 l = k;
214 } while ( l > k );
215
216 l = complexIdentifier( k );
217 if ( l == k )
218 return j;
219
220 k = whitespace( l );
221 if ( t[k] == ':' && t[k+1] == ':' ) {
222 l = whitespace( simpleIdentifier( k+2 ) );
223 if ( l == k )
224 return j;
225 k = l;
226 }
227
228 if ( t[k] == '&' || t[k] == '*' )
229 k = whitespace( k + 1 );
230 return k;
231 }
232
233
234 /*! Parses a type specifier and returns it as a string. If the cursor
235 doesn't point to one, type() returns an empty string.
236
237 */
238
type()239 EString Parser::type()
240 {
241 uint j = type( i );
242 EString r = t.mid( i, j-i ).simplified(); // simplified() is not quite right
243 i = j;
244 while ( r.startsWith( "class " ) )
245 r = r.mid( 6 );
246 r.replace( " class ", " " );
247 return r;
248 }
249
250
251 /*! Parses an argument list (for a particularly misleading meaning of
252 parse) and returns it. The cursor must be on the leading '(', it
253 will be left immediately after the trailing ')'.
254
255 The argument list is returned including parentheses. In case of an
256 error, an empty string is returned and the cursor is left near the
257 error.
258 */
259
argumentList()260 EString Parser::argumentList()
261 {
262 EString r;
263 uint j = whitespace( i );
264 if ( t[j] != '(' )
265 return r;
266 r = "( ";
267 i = whitespace( j + 1 );
268 if ( t[i] == ')' ) {
269 i++;
270 return "()";
271 }
272 EString s = "";
273 bool more = true;
274 while ( more ) {
275 EString tp = type();
276 if ( tp.isEmpty() )
277 return ""; // error message here?
278 whitespace();
279 j = simpleIdentifier( i );
280 if ( j > i ) { // there is a variable name
281 tp = tp + " " + t.mid( i, j-i ).simplified();
282 i = j;
283 }
284 r = r + s + tp;
285 whitespace();
286 if ( t[i] == '=' ) { // there is a default value...
287 while ( i < t.length() && t[i] != ',' && t[i] != ')' )
288 i++;
289 whitespace();
290 }
291 else if ( t[i] == '[' && t[i+1] == ']' ) { // this argument is an array
292 i = i + 2;
293 r.append( "[]" );
294 whitespace();
295 }
296 s = ", ";
297 if ( t[i] == ',' ) {
298 more = true;
299 i++;
300 }
301 else {
302 more = false;
303 }
304 }
305 if ( t[i] != ')' )
306 return "";
307 r.append( " )" );
308 i++;
309 return r;
310 }
311
312
313 /*! Steps the Parser past one character. */
314
step()315 void Parser::step()
316 {
317 i++;
318 }
319
320
321 /*! Returns true if the first unparsed characters of the string are
322 the same as \a pattern, and false if not. */
323
lookingAt(const EString & pattern)324 bool Parser::lookingAt( const EString & pattern )
325 {
326 return t.mid( i, pattern.length() ) == pattern;
327 }
328
329
330 /*! Parses and steps past a single word. If the next nonwhitespace
331 character is not a word character, this function returns an empty
332 string.
333 */
334
word()335 EString Parser::word()
336 {
337 uint j = simpleIdentifier( i );
338 while ( t[j] == '-' ) {
339 uint k = simpleIdentifier( j+1 );
340 if ( k > j + 1 )
341 j = k;
342 }
343 EString r = t.mid( i, j-i ).simplified();
344 if ( !r.isEmpty() )
345 i = j;
346 return r;
347 }
348
349
350 /*! Parses and steps past a single value, which is either a number or
351 an identifier.
352 */
353
value()354 EString Parser::value()
355 {
356 uint j = whitespace( i );
357 if ( t[j] == '-' ||
358 ( t[j] >= '0' && t[j] <= '9' ) ) {
359 uint k = j;
360 if ( t[k] == '-' )
361 k++;
362 while ( t[k] >= '0' && t[k] <= '9' )
363 k++;
364 EString r( t.mid( j, k-j ) );
365 i = k;
366 return r;
367 }
368 return identifier();
369 }
370
371
372 /*! Steps past the whitespace starting at \a j and return the index of
373 the first following nonwhitespace character.
374 */
375
whitespace(uint j)376 uint Parser::whitespace( uint j )
377 {
378 uint k;
379 do {
380 k = j;
381
382 while ( t[j] == 32 || t[j] == 9 || t[j] == 13 || t[j] == 10 )
383 j++;
384
385 if ( t[j] == '/' && t[j+1] == '/' ) {
386 while ( j < t.length() && t[j] != '\n' )
387 j++;
388 }
389 } while ( j > k );
390
391 return j;
392 }
393
394
395 /*! Reads past an operator name starting at \a j and returns the index
396 of the following characters. If \a j does not point to an operator
397 name, operatorHack() returns \a j.
398 */
399
operatorHack(uint j)400 uint Parser::operatorHack( uint j )
401 {
402 uint i, k = j+8;
403 k = whitespace( k );
404
405 // Four possible cases: We're looking at a single character, two
406 // characters, '()', or "EString".
407
408 uint chars = 0;
409
410 if ( t[k] == '(' && t[k+1] == ')' ) {
411 chars = 2;
412 }
413 else if ( ( ( t[k] > ' ' && t[k] < '@' ) ||
414 ( t[k] > 'Z' && t[k] < 'a' ) ) &&
415 !( t[k] >= '0' && t[k] <= '9' ) ) {
416 chars = 1;
417 if ( t[k+1] != '(' &&
418 ( ( t[k+1] > ' ' && t[k+1] < '@' ) ||
419 ( t[k] > 'Z' && t[k] < 'a' ) ) &&
420 !( t[k+1] >= '0' && t[k+1] <= '9' ) )
421 chars = 2;
422 }
423 else if ( ( i = type( k ) ) > k ) {
424 chars = i-k;
425 }
426
427 if ( chars > 0 ) {
428 k = whitespace( k+chars );
429 if ( t[k] == '(' )
430 return k;
431 }
432 return j;
433 }
434