1 /*
2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
3 *
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
6 *
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License.
9 *
10 * This module contains code for generating tags for Windows PowerShell scripts.
11 */
12
13 /*
14 * INCLUDE FILES
15 */
16 #include "general.h" /* must always come first */
17 #include "debug.h"
18 #include "parse.h"
19 #include "read.h"
20 #include "vstring.h"
21 #include "keyword.h"
22 #include "entry.h"
23 #include "routines.h"
24 #include <string.h>
25
26 #define SCOPE_SEPARATOR "::"
27
28
29 #define ACCESS_UNDEFINED NULL
30 static const char *const accessTypes[] = {
31 ACCESS_UNDEFINED,
32 "global",
33 "local",
34 "script",
35 "private"
36 };
37
38 typedef enum {
39 K_FUNCTION,
40 K_VARIABLE,
41 COUNT_KIND
42 } powerShellKind;
43
44 static kindDefinition PowerShellKinds[COUNT_KIND] = {
45 { true, 'f', "function", "functions" },
46 { true, 'v', "variable", "variables" }
47 };
48
49
50 typedef enum eTokenType {
51 TOKEN_UNDEFINED,
52 TOKEN_EOF,
53 TOKEN_CLOSE_PAREN,
54 TOKEN_SEMICOLON,
55 TOKEN_COLON,
56 TOKEN_COMMA,
57 TOKEN_KEYWORD,
58 TOKEN_OPEN_PAREN,
59 TOKEN_OPERATOR,
60 TOKEN_IDENTIFIER,
61 TOKEN_STRING,
62 TOKEN_PERIOD,
63 TOKEN_OPEN_CURLY,
64 TOKEN_CLOSE_CURLY,
65 TOKEN_EQUAL_SIGN,
66 TOKEN_OPEN_SQUARE,
67 TOKEN_CLOSE_SQUARE,
68 TOKEN_VARIABLE
69 } tokenType;
70
71 typedef struct {
72 tokenType type;
73 vString * string;
74 vString * scope;
75 unsigned long lineNumber;
76 MIOPos filePosition;
77 int parentKind; /* -1 if none */
78 } tokenInfo;
79
80
findValidAccessType(const char * const access)81 static const char *findValidAccessType (const char *const access)
82 {
83 unsigned int i;
84 if (access == ACCESS_UNDEFINED)
85 return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
86 for (i = 0; i < ARRAY_SIZE(accessTypes); i++)
87 {
88 if (accessTypes[i] == ACCESS_UNDEFINED)
89 continue;
90 if (strcasecmp (access, accessTypes[i]) == 0)
91 return accessTypes[i];
92 i++;
93 }
94 return ACCESS_UNDEFINED;
95 }
96
initPowerShellEntry(tagEntryInfo * const e,const tokenInfo * const token,const powerShellKind kind,const char * const access)97 static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
98 const powerShellKind kind, const char *const access)
99 {
100 initTagEntry (e, vStringValue (token->string), kind);
101
102 e->lineNumber = token->lineNumber;
103 e->filePosition = token->filePosition;
104
105 if (access != NULL)
106 e->extensionFields.access = access;
107 if (vStringLength (token->scope) > 0)
108 {
109 int parentKind = token->parentKind;
110 Assert (parentKind >= 0);
111
112 e->extensionFields.scopeKindIndex = parentKind;
113 e->extensionFields.scopeName = vStringValue (token->scope);
114 }
115 }
116
makeSimplePowerShellTag(const tokenInfo * const token,const powerShellKind kind,const char * const access)117 static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
118 const char *const access)
119 {
120 if (PowerShellKinds[kind].enabled)
121 {
122 tagEntryInfo e;
123
124 initPowerShellEntry (&e, token, kind, access);
125 makeTagEntry (&e);
126 }
127 }
128
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const char * const access)129 static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
130 const char *const access)
131 {
132 if (PowerShellKinds[K_FUNCTION].enabled)
133 {
134 tagEntryInfo e;
135
136 initPowerShellEntry (&e, token, K_FUNCTION, access);
137
138 if (arglist)
139 e.extensionFields.signature = vStringValue (arglist);
140
141 makeTagEntry (&e);
142 }
143 }
144
newToken(void)145 static tokenInfo *newToken (void)
146 {
147 tokenInfo *const token = xMalloc (1, tokenInfo);
148
149 token->type = TOKEN_UNDEFINED;
150 token->string = vStringNew ();
151 token->scope = vStringNew ();
152 token->lineNumber = getInputLineNumber ();
153 token->filePosition = getInputFilePosition ();
154 token->parentKind = -1;
155
156 return token;
157 }
158
deleteToken(tokenInfo * const token)159 static void deleteToken (tokenInfo *const token)
160 {
161 vStringDelete (token->string);
162 vStringDelete (token->scope);
163 eFree (token);
164 }
165
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool scope)166 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
167 bool scope)
168 {
169 dest->lineNumber = src->lineNumber;
170 dest->filePosition = src->filePosition;
171 dest->type = src->type;
172 vStringCopy (dest->string, src->string);
173 dest->parentKind = src->parentKind;
174 if (scope)
175 vStringCopy (dest->scope, src->scope);
176 }
177
addToScope(tokenInfo * const token,const vString * const extra)178 static void addToScope (tokenInfo *const token, const vString *const extra)
179 {
180 if (vStringLength (token->scope) > 0)
181 vStringCatS (token->scope, SCOPE_SEPARATOR);
182 vStringCatS (token->scope, vStringValue (extra));
183 }
184
isIdentChar(const int c)185 static bool isIdentChar (const int c)
186 {
187 return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
188 }
189
parseString(vString * const string,const int delimiter)190 static void parseString (vString *const string, const int delimiter)
191 {
192 while (true)
193 {
194 int c = getcFromInputFile ();
195
196 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
197 vStringPut (string, (char) c);
198 else if (c == EOF || c == delimiter)
199 break;
200 else
201 vStringPut (string, (char) c);
202 }
203 }
204
parseIdentifier(vString * const string,const int firstChar)205 static void parseIdentifier (vString *const string, const int firstChar)
206 {
207 int c = firstChar;
208 do
209 {
210 vStringPut (string, (char) c);
211 c = getcFromInputFile ();
212 } while (isIdentChar (c));
213 ungetcToInputFile (c);
214 }
215
isTokenFunction(vString * const name)216 static bool isTokenFunction (vString *const name)
217 {
218 return (strcasecmp (vStringValue (name), "function") == 0 ||
219 strcasecmp (vStringValue (name), "filter") == 0);
220 }
221
isSpace(int c)222 static bool isSpace (int c)
223 {
224 return (c == '\t' || c == ' ' || c == '\v' ||
225 c == '\n' || c == '\r' || c == '\f');
226 }
227
skipWhitespaces(int c)228 static int skipWhitespaces (int c)
229 {
230 while (isSpace (c))
231 c = getcFromInputFile ();
232 return c;
233 }
234
skipSingleComment(void)235 static int skipSingleComment (void)
236 {
237 int c;
238 do
239 {
240 c = getcFromInputFile ();
241 if (c == '\r')
242 {
243 int next = getcFromInputFile ();
244 if (next != '\n')
245 ungetcToInputFile (next);
246 else
247 c = next;
248 }
249 } while (c != EOF && c != '\n' && c != '\r');
250 return c;
251 }
252
readToken(tokenInfo * const token)253 static void readToken (tokenInfo *const token)
254 {
255 int c;
256
257 token->type = TOKEN_UNDEFINED;
258 vStringClear (token->string);
259
260 getNextChar:
261
262 c = getcFromInputFile ();
263 c = skipWhitespaces (c);
264
265 token->lineNumber = getInputLineNumber ();
266 token->filePosition = getInputFilePosition ();
267
268 switch (c)
269 {
270 case EOF: token->type = TOKEN_EOF; break;
271 case '(': token->type = TOKEN_OPEN_PAREN; break;
272 case ')': token->type = TOKEN_CLOSE_PAREN; break;
273 case ';': token->type = TOKEN_SEMICOLON; break;
274 case ',': token->type = TOKEN_COMMA; break;
275 case '.': token->type = TOKEN_PERIOD; break;
276 case ':': token->type = TOKEN_COLON; break;
277 case '{': token->type = TOKEN_OPEN_CURLY; break;
278 case '}': token->type = TOKEN_CLOSE_CURLY; break;
279 case '[': token->type = TOKEN_OPEN_SQUARE; break;
280 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
281 case '=': token->type = TOKEN_EQUAL_SIGN; break;
282
283 case '\'':
284 case '"':
285 token->type = TOKEN_STRING;
286 parseString (token->string, c);
287 token->lineNumber = getInputLineNumber ();
288 token->filePosition = getInputFilePosition ();
289 break;
290
291 case '<':
292 {
293 int d = getcFromInputFile ();
294 if (d == '#')
295 {
296 /* <# ... #> multiline comment */
297 do
298 {
299 c = skipToCharacterInInputFile ('#');
300 if (c != EOF)
301 {
302 c = getcFromInputFile ();
303 if (c == '>')
304 break;
305 else
306 ungetcToInputFile (c);
307 }
308 } while (c != EOF);
309 goto getNextChar;
310 }
311 else
312 {
313 ungetcToInputFile (d);
314 token->type = TOKEN_UNDEFINED;
315 }
316 break;
317 }
318
319 case '#': /* comment */
320 skipSingleComment ();
321 goto getNextChar;
322 break;
323
324 case '+':
325 case '-':
326 case '*':
327 case '/':
328 case '%':
329 {
330 int d = getcFromInputFile ();
331 if (d != '=')
332 ungetcToInputFile (d);
333 token->type = TOKEN_OPERATOR;
334 break;
335 }
336
337 case '$': /* variable start */
338 {
339 int d = getcFromInputFile ();
340 if (! isIdentChar (d))
341 {
342 ungetcToInputFile (d);
343 token->type = TOKEN_UNDEFINED;
344 }
345 else
346 {
347 parseIdentifier (token->string, d);
348 token->type = TOKEN_VARIABLE;
349 }
350 break;
351 }
352
353 default:
354 if (! isIdentChar (c))
355 token->type = TOKEN_UNDEFINED;
356 else
357 {
358 parseIdentifier (token->string, c);
359 if (isTokenFunction (token->string))
360 token->type = TOKEN_KEYWORD;
361 else
362 token->type = TOKEN_IDENTIFIER;
363 }
364 break;
365 }
366 }
367
368 static void enterScope (tokenInfo *const parentToken,
369 const vString *const extraScope,
370 const int parentKind);
371
372 /* strip a possible PowerShell scope specification and convert it to accessType */
parsePowerShellScope(tokenInfo * const token)373 static const char *parsePowerShellScope (tokenInfo *const token)
374 {
375 const char *access = ACCESS_UNDEFINED;
376 const char *const tokenName = vStringValue (token->string);
377 const char *powershellScopeEnd;
378
379 powershellScopeEnd = strchr (tokenName, ':');
380 if (powershellScopeEnd)
381 {
382 size_t powershellScopeLen;
383 vString * powershellScope = vStringNew ();
384
385 powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
386 /* extract the scope */
387 vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
388 /* cut the resulting scope string from the identifier */
389 memmove (token->string->buffer,
390 /* +1 to skip the leading colon */
391 token->string->buffer + powershellScopeLen + 1,
392 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
393 token->string->length + 1 - powershellScopeLen - 1);
394 token->string->length -= powershellScopeLen + 1;
395
396 access = findValidAccessType (vStringValue (powershellScope));
397
398 vStringDelete (powershellScope);
399 }
400 return access;
401 }
402
403
404 /* parse a function
405 *
406 * function myfunc($foo, $bar) {}
407 */
parseFunction(tokenInfo * const token)408 static bool parseFunction (tokenInfo *const token)
409 {
410 bool readNext = true;
411 tokenInfo *nameFree = NULL;
412 const char *access;
413
414 readToken (token);
415
416 if (token->type != TOKEN_IDENTIFIER)
417 return false;
418
419 access = parsePowerShellScope (token);
420
421 nameFree = newToken ();
422 copyToken (nameFree, token, true);
423 readToken (token);
424
425 if (token->type == TOKEN_OPEN_PAREN)
426 {
427 vString *arglist = vStringNew ();
428 int depth = 1;
429
430 vStringPut (arglist, '(');
431 do
432 {
433 readToken (token);
434
435 switch (token->type)
436 {
437 case TOKEN_OPEN_PAREN: depth++; break;
438 case TOKEN_CLOSE_PAREN: depth--; break;
439 default: break;
440 }
441 /* display part */
442 switch (token->type)
443 {
444 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
445 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
446 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
447 case TOKEN_COLON: vStringPut (arglist, ':'); break;
448 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
449 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
450 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
451 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
452 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
453 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
454 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
455 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
456
457 case TOKEN_IDENTIFIER:
458 case TOKEN_KEYWORD:
459 case TOKEN_VARIABLE:
460 {
461 switch (vStringLast (arglist))
462 {
463 case 0:
464 case ' ':
465 case '{':
466 case '(':
467 case '[':
468 case '.':
469 /* no need for a space between those and the identifier */
470 break;
471
472 default:
473 vStringPut (arglist, ' ');
474 break;
475 }
476 if (token->type == TOKEN_VARIABLE)
477 vStringPut (arglist, '$');
478 vStringCat (arglist, token->string);
479 break;
480 }
481
482 default: break;
483 }
484 }
485 while (token->type != TOKEN_EOF && depth > 0);
486
487 makeFunctionTag (nameFree, arglist, access);
488 vStringDelete (arglist);
489
490 readToken (token);
491 }
492 else if (token->type == TOKEN_OPEN_CURLY)
493 { /* filters doesn't need to have an arglist */
494 makeFunctionTag (nameFree, NULL, access);
495 }
496
497 if (token->type == TOKEN_OPEN_CURLY)
498 enterScope (token, nameFree->string, K_FUNCTION);
499 else
500 readNext = false;
501
502 if (nameFree)
503 deleteToken (nameFree);
504
505 return readNext;
506 }
507
508 /* parses declarations of the form
509 * $var = VALUE
510 */
parseVariable(tokenInfo * const token)511 static bool parseVariable (tokenInfo *const token)
512 {
513 tokenInfo *name;
514 bool readNext = true;
515 const char *access;
516
517 name = newToken ();
518 copyToken (name, token, true);
519
520 readToken (token);
521 if (token->type == TOKEN_EQUAL_SIGN)
522 {
523 if (token->parentKind != K_FUNCTION)
524 { /* ignore local variables (i.e. within a function) */
525 access = parsePowerShellScope (name);
526 makeSimplePowerShellTag (name, K_VARIABLE, access);
527 readNext = true;
528 }
529 }
530 else
531 readNext = false;
532
533 deleteToken (name);
534
535 return readNext;
536 }
537
enterScope(tokenInfo * const parentToken,const vString * const extraScope,const int parentKind)538 static void enterScope (tokenInfo *const parentToken,
539 const vString *const extraScope,
540 const int parentKind)
541 {
542 tokenInfo *token = newToken ();
543 int origParentKind = parentToken->parentKind;
544
545 copyToken (token, parentToken, true);
546
547 if (extraScope)
548 {
549 addToScope (token, extraScope);
550 token->parentKind = parentKind;
551 }
552
553 readToken (token);
554 while (token->type != TOKEN_EOF &&
555 token->type != TOKEN_CLOSE_CURLY)
556 {
557 bool readNext = true;
558
559 switch (token->type)
560 {
561 case TOKEN_OPEN_CURLY:
562 enterScope (token, NULL, -1);
563 break;
564
565 case TOKEN_KEYWORD:
566 readNext = parseFunction (token);
567 break;
568
569 case TOKEN_VARIABLE:
570 readNext = parseVariable (token);
571 break;
572
573 default: break;
574 }
575
576 if (readNext)
577 readToken (token);
578 }
579
580 copyToken (parentToken, token, false);
581 parentToken->parentKind = origParentKind;
582 deleteToken (token);
583 }
584
findPowerShellTags(void)585 static void findPowerShellTags (void)
586 {
587 tokenInfo *const token = newToken ();
588
589 do
590 {
591 enterScope (token, NULL, -1);
592 }
593 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
594
595 deleteToken (token);
596 }
597
PowerShellParser(void)598 extern parserDefinition* PowerShellParser (void)
599 {
600 static const char *const extensions [] = { "ps1", "psm1", NULL };
601 parserDefinition* def = parserNew ("PowerShell");
602 def->kindTable = PowerShellKinds;
603 def->kindCount = ARRAY_SIZE (PowerShellKinds);
604 def->extensions = extensions;
605 def->parser = findPowerShellTags;
606 return def;
607 }
608