1 /*
2 *   Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
3 *
4 *   Loosely based on the PHP tags parser since the syntax is somewhat similar
5 *   regarding variable and function definitions.
6 *
7 *   This source code is released for free distribution under the terms of the
8 *   GNU General Public License.
9 *
10 *   This module contains code for generating tags for Windows PowerShell scripts.
11 */
12 
13 /*
14 *   INCLUDE FILES
15 */
16 #include "general.h"  /* must always come first */
17 #include "debug.h"
18 #include "parse.h"
19 #include "read.h"
20 #include "vstring.h"
21 #include "keyword.h"
22 #include "entry.h"
23 #include "routines.h"
24 #include <string.h>
25 
26 #define SCOPE_SEPARATOR "::"
27 
28 
29 #define ACCESS_UNDEFINED NULL
30 static const char *const accessTypes[] = {
31 	ACCESS_UNDEFINED,
32 	"global",
33 	"local",
34 	"script",
35 	"private"
36 };
37 
38 typedef enum {
39 	K_FUNCTION,
40 	K_VARIABLE,
41 	COUNT_KIND
42 } powerShellKind;
43 
44 static kindDefinition PowerShellKinds[COUNT_KIND] = {
45 	{ true, 'f', "function",	"functions" },
46 	{ true, 'v', "variable",	"variables" }
47 };
48 
49 
50 typedef enum eTokenType {
51 	TOKEN_UNDEFINED,
52 	TOKEN_EOF,
53 	TOKEN_CLOSE_PAREN,
54 	TOKEN_SEMICOLON,
55 	TOKEN_COLON,
56 	TOKEN_COMMA,
57 	TOKEN_KEYWORD,
58 	TOKEN_OPEN_PAREN,
59 	TOKEN_OPERATOR,
60 	TOKEN_IDENTIFIER,
61 	TOKEN_STRING,
62 	TOKEN_PERIOD,
63 	TOKEN_OPEN_CURLY,
64 	TOKEN_CLOSE_CURLY,
65 	TOKEN_EQUAL_SIGN,
66 	TOKEN_OPEN_SQUARE,
67 	TOKEN_CLOSE_SQUARE,
68 	TOKEN_VARIABLE
69 } tokenType;
70 
71 typedef struct {
72 	tokenType		type;
73 	vString *		string;
74 	vString *		scope;
75 	unsigned long	lineNumber;
76 	MIOPos			filePosition;
77 	int 			parentKind; /* -1 if none */
78 } tokenInfo;
79 
80 
findValidAccessType(const char * const access)81 static const char *findValidAccessType (const char *const access)
82 {
83 	unsigned int i;
84 	if (access == ACCESS_UNDEFINED)
85 		return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
86 	for (i = 0; i < ARRAY_SIZE(accessTypes); i++)
87 	{
88 		if (accessTypes[i] == ACCESS_UNDEFINED)
89 			continue;
90 		if (strcasecmp (access, accessTypes[i]) == 0)
91 			return accessTypes[i];
92 		i++;
93 	}
94 	return ACCESS_UNDEFINED;
95 }
96 
initPowerShellEntry(tagEntryInfo * const e,const tokenInfo * const token,const powerShellKind kind,const char * const access)97 static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
98 								 const powerShellKind kind, const char *const access)
99 {
100 	initTagEntry (e, vStringValue (token->string), kind);
101 
102 	e->lineNumber	= token->lineNumber;
103 	e->filePosition	= token->filePosition;
104 
105 	if (access != NULL)
106 		e->extensionFields.access = access;
107 	if (vStringLength (token->scope) > 0)
108 	{
109 		int parentKind = token->parentKind;
110 		Assert (parentKind >= 0);
111 
112 		e->extensionFields.scopeKindIndex = parentKind;
113 		e->extensionFields.scopeName = vStringValue (token->scope);
114 	}
115 }
116 
makeSimplePowerShellTag(const tokenInfo * const token,const powerShellKind kind,const char * const access)117 static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
118 									 const char *const access)
119 {
120 	if (PowerShellKinds[kind].enabled)
121 	{
122 		tagEntryInfo e;
123 
124 		initPowerShellEntry (&e, token, kind, access);
125 		makeTagEntry (&e);
126 	}
127 }
128 
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const char * const access)129 static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
130 							 const char *const access)
131 {
132 	if (PowerShellKinds[K_FUNCTION].enabled)
133 	{
134 		tagEntryInfo e;
135 
136 		initPowerShellEntry (&e, token, K_FUNCTION, access);
137 
138 		if (arglist)
139 			e.extensionFields.signature = vStringValue (arglist);
140 
141 		makeTagEntry (&e);
142 	}
143 }
144 
newToken(void)145 static tokenInfo *newToken (void)
146 {
147 	tokenInfo *const token = xMalloc (1, tokenInfo);
148 
149 	token->type			= TOKEN_UNDEFINED;
150 	token->string		= vStringNew ();
151 	token->scope		= vStringNew ();
152 	token->lineNumber   = getInputLineNumber ();
153 	token->filePosition = getInputFilePosition ();
154 	token->parentKind	= -1;
155 
156 	return token;
157 }
158 
deleteToken(tokenInfo * const token)159 static void deleteToken (tokenInfo *const token)
160 {
161 	vStringDelete (token->string);
162 	vStringDelete (token->scope);
163 	eFree (token);
164 }
165 
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool scope)166 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
167 					   bool scope)
168 {
169 	dest->lineNumber = src->lineNumber;
170 	dest->filePosition = src->filePosition;
171 	dest->type = src->type;
172 	vStringCopy (dest->string, src->string);
173 	dest->parentKind = src->parentKind;
174 	if (scope)
175 		vStringCopy (dest->scope, src->scope);
176 }
177 
addToScope(tokenInfo * const token,const vString * const extra)178 static void addToScope (tokenInfo *const token, const vString *const extra)
179 {
180 	if (vStringLength (token->scope) > 0)
181 		vStringCatS (token->scope, SCOPE_SEPARATOR);
182 	vStringCatS (token->scope, vStringValue (extra));
183 }
184 
isIdentChar(const int c)185 static bool isIdentChar (const int c)
186 {
187 	return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
188 }
189 
parseString(vString * const string,const int delimiter)190 static void parseString (vString *const string, const int delimiter)
191 {
192 	while (true)
193 	{
194 		int c = getcFromInputFile ();
195 
196 		if (c == '\\' && (c = getcFromInputFile ()) != EOF)
197 			vStringPut (string, (char) c);
198 		else if (c == EOF || c == delimiter)
199 			break;
200 		else
201 			vStringPut (string, (char) c);
202 	}
203 }
204 
parseIdentifier(vString * const string,const int firstChar)205 static void parseIdentifier (vString *const string, const int firstChar)
206 {
207 	int c = firstChar;
208 	do
209 	{
210 		vStringPut (string, (char) c);
211 		c = getcFromInputFile ();
212 	} while (isIdentChar (c));
213 	ungetcToInputFile (c);
214 }
215 
isTokenFunction(vString * const name)216 static bool isTokenFunction (vString *const name)
217 {
218 	return (strcasecmp (vStringValue (name), "function") == 0 ||
219 			strcasecmp (vStringValue (name), "filter") == 0);
220 }
221 
isSpace(int c)222 static bool isSpace (int c)
223 {
224 	return (c == '\t' || c == ' ' || c == '\v' ||
225 			c == '\n' || c == '\r' || c == '\f');
226 }
227 
skipWhitespaces(int c)228 static int skipWhitespaces (int c)
229 {
230 	while (isSpace (c))
231 		c = getcFromInputFile ();
232 	return c;
233 }
234 
skipSingleComment(void)235 static int skipSingleComment (void)
236 {
237 	int c;
238 	do
239 	{
240 		c = getcFromInputFile ();
241 		if (c == '\r')
242 		{
243 			int next = getcFromInputFile ();
244 			if (next != '\n')
245 				ungetcToInputFile (next);
246 			else
247 				c = next;
248 		}
249 	} while (c != EOF && c != '\n' && c != '\r');
250 	return c;
251 }
252 
readToken(tokenInfo * const token)253 static void readToken (tokenInfo *const token)
254 {
255 	int c;
256 
257 	token->type		= TOKEN_UNDEFINED;
258 	vStringClear (token->string);
259 
260 getNextChar:
261 
262 	c = getcFromInputFile ();
263 	c = skipWhitespaces (c);
264 
265 	token->lineNumber   = getInputLineNumber ();
266 	token->filePosition = getInputFilePosition ();
267 
268 	switch (c)
269 	{
270 		case EOF: token->type = TOKEN_EOF;					break;
271 		case '(': token->type = TOKEN_OPEN_PAREN;			break;
272 		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
273 		case ';': token->type = TOKEN_SEMICOLON;			break;
274 		case ',': token->type = TOKEN_COMMA;				break;
275 		case '.': token->type = TOKEN_PERIOD;				break;
276 		case ':': token->type = TOKEN_COLON;				break;
277 		case '{': token->type = TOKEN_OPEN_CURLY;			break;
278 		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
279 		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
280 		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;
281 		case '=': token->type = TOKEN_EQUAL_SIGN;			break;
282 
283 		case '\'':
284 		case '"':
285 			token->type = TOKEN_STRING;
286 			parseString (token->string, c);
287 			token->lineNumber = getInputLineNumber ();
288 			token->filePosition = getInputFilePosition ();
289 			break;
290 
291 		case '<':
292 		{
293 			int d = getcFromInputFile ();
294 			if (d == '#')
295 			{
296 				/* <# ... #> multiline comment */
297 				do
298 				{
299 					c = skipToCharacterInInputFile ('#');
300 					if (c != EOF)
301 					{
302 						c = getcFromInputFile ();
303 						if (c == '>')
304 							break;
305 						else
306 							ungetcToInputFile (c);
307 					}
308 				} while (c != EOF);
309 				goto getNextChar;
310 			}
311 			else
312 			{
313 				ungetcToInputFile (d);
314 				token->type = TOKEN_UNDEFINED;
315 			}
316 			break;
317 		}
318 
319 		case '#': /* comment */
320 			skipSingleComment ();
321 			goto getNextChar;
322 			break;
323 
324 		case '+':
325 		case '-':
326 		case '*':
327 		case '/':
328 		case '%':
329 		{
330 			int d = getcFromInputFile ();
331 			if (d != '=')
332 				ungetcToInputFile (d);
333 			token->type = TOKEN_OPERATOR;
334 			break;
335 		}
336 
337 		case '$': /* variable start */
338 		{
339 			int d = getcFromInputFile ();
340 			if (! isIdentChar (d))
341 			{
342 				ungetcToInputFile (d);
343 				token->type = TOKEN_UNDEFINED;
344 			}
345 			else
346 			{
347 				parseIdentifier (token->string, d);
348 				token->type = TOKEN_VARIABLE;
349 			}
350 			break;
351 		}
352 
353 		default:
354 			if (! isIdentChar (c))
355 				token->type = TOKEN_UNDEFINED;
356 			else
357 			{
358 				parseIdentifier (token->string, c);
359 				if (isTokenFunction (token->string))
360 					token->type = TOKEN_KEYWORD;
361 				else
362 					token->type = TOKEN_IDENTIFIER;
363 			}
364 			break;
365 	}
366 }
367 
368 static void enterScope (tokenInfo *const parentToken,
369 						const vString *const extraScope,
370 						const int parentKind);
371 
372 /* strip a possible PowerShell scope specification and convert it to accessType */
parsePowerShellScope(tokenInfo * const token)373 static const char *parsePowerShellScope (tokenInfo *const token)
374 {
375 	const char *access = ACCESS_UNDEFINED;
376 	const char *const tokenName = vStringValue (token->string);
377 	const char *powershellScopeEnd;
378 
379 	powershellScopeEnd = strchr (tokenName, ':');
380 	if (powershellScopeEnd)
381 	{
382 		size_t powershellScopeLen;
383 		vString * powershellScope = vStringNew ();
384 
385 		powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
386 		/* extract the scope */
387 		vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
388 		/* cut the resulting scope string from the identifier */
389 		memmove (token->string->buffer,
390 				 /* +1 to skip the leading colon */
391 				 token->string->buffer + powershellScopeLen + 1,
392 				 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
393 				 token->string->length + 1 - powershellScopeLen - 1);
394 		token->string->length -= powershellScopeLen + 1;
395 
396 		access = findValidAccessType (vStringValue (powershellScope));
397 
398 		vStringDelete (powershellScope);
399 	}
400 	return access;
401 }
402 
403 
404 /* parse a function
405  *
406  * 	function myfunc($foo, $bar) {}
407  */
parseFunction(tokenInfo * const token)408 static bool parseFunction (tokenInfo *const token)
409 {
410 	bool readNext = true;
411 	tokenInfo *nameFree = NULL;
412 	const char *access;
413 
414 	readToken (token);
415 
416 	if (token->type != TOKEN_IDENTIFIER)
417 		return false;
418 
419 	access = parsePowerShellScope (token);
420 
421 	nameFree = newToken ();
422 	copyToken (nameFree, token, true);
423 	readToken (token);
424 
425 	if (token->type == TOKEN_OPEN_PAREN)
426 	{
427 		vString *arglist = vStringNew ();
428 		int depth = 1;
429 
430 		vStringPut (arglist, '(');
431 		do
432 		{
433 			readToken (token);
434 
435 			switch (token->type)
436 			{
437 				case TOKEN_OPEN_PAREN:  depth++; break;
438 				case TOKEN_CLOSE_PAREN: depth--; break;
439 				default: break;
440 			}
441 			/* display part */
442 			switch (token->type)
443 			{
444 				case TOKEN_CLOSE_CURLY:		vStringPut (arglist, '}');		break;
445 				case TOKEN_CLOSE_PAREN:		vStringPut (arglist, ')');		break;
446 				case TOKEN_CLOSE_SQUARE:	vStringPut (arglist, ']');		break;
447 				case TOKEN_COLON:			vStringPut (arglist, ':');		break;
448 				case TOKEN_COMMA:			vStringCatS (arglist, ", ");	break;
449 				case TOKEN_EQUAL_SIGN:		vStringCatS (arglist, " = ");	break;
450 				case TOKEN_OPEN_CURLY:		vStringPut (arglist, '{');		break;
451 				case TOKEN_OPEN_PAREN:		vStringPut (arglist, '(');		break;
452 				case TOKEN_OPEN_SQUARE:		vStringPut (arglist, '[');		break;
453 				case TOKEN_PERIOD:			vStringPut (arglist, '.');		break;
454 				case TOKEN_SEMICOLON:		vStringPut (arglist, ';');		break;
455 				case TOKEN_STRING:			vStringCatS (arglist, "'...'");	break;
456 
457 				case TOKEN_IDENTIFIER:
458 				case TOKEN_KEYWORD:
459 				case TOKEN_VARIABLE:
460 				{
461 					switch (vStringLast (arglist))
462 					{
463 						case 0:
464 						case ' ':
465 						case '{':
466 						case '(':
467 						case '[':
468 						case '.':
469 							/* no need for a space between those and the identifier */
470 							break;
471 
472 						default:
473 							vStringPut (arglist, ' ');
474 							break;
475 					}
476 					if (token->type == TOKEN_VARIABLE)
477 						vStringPut (arglist, '$');
478 					vStringCat (arglist, token->string);
479 					break;
480 				}
481 
482 				default: break;
483 			}
484 		}
485 		while (token->type != TOKEN_EOF && depth > 0);
486 
487 		makeFunctionTag (nameFree, arglist, access);
488 		vStringDelete (arglist);
489 
490 		readToken (token);
491 	}
492 	else if (token->type == TOKEN_OPEN_CURLY)
493 	{	/* filters doesn't need to have an arglist */
494 		makeFunctionTag (nameFree, NULL, access);
495 	}
496 
497 	if (token->type == TOKEN_OPEN_CURLY)
498 		enterScope (token, nameFree->string, K_FUNCTION);
499 	else
500 		readNext = false;
501 
502 	if (nameFree)
503 		deleteToken (nameFree);
504 
505 	return readNext;
506 }
507 
508 /* parses declarations of the form
509  * 	$var = VALUE
510  */
parseVariable(tokenInfo * const token)511 static bool parseVariable (tokenInfo *const token)
512 {
513 	tokenInfo *name;
514 	bool readNext = true;
515 	const char *access;
516 
517 	name = newToken ();
518 	copyToken (name, token, true);
519 
520 	readToken (token);
521 	if (token->type == TOKEN_EQUAL_SIGN)
522 	{
523 		if (token->parentKind != K_FUNCTION)
524 		{	/* ignore local variables (i.e. within a function) */
525 			access = parsePowerShellScope (name);
526 			makeSimplePowerShellTag (name, K_VARIABLE, access);
527 			readNext = true;
528 		}
529 	}
530 	else
531 		readNext = false;
532 
533 	deleteToken (name);
534 
535 	return readNext;
536 }
537 
enterScope(tokenInfo * const parentToken,const vString * const extraScope,const int parentKind)538 static void enterScope (tokenInfo *const parentToken,
539 						const vString *const extraScope,
540 						const int parentKind)
541 {
542 	tokenInfo *token = newToken ();
543 	int origParentKind = parentToken->parentKind;
544 
545 	copyToken (token, parentToken, true);
546 
547 	if (extraScope)
548 	{
549 		addToScope (token, extraScope);
550 		token->parentKind = parentKind;
551 	}
552 
553 	readToken (token);
554 	while (token->type != TOKEN_EOF &&
555 		   token->type != TOKEN_CLOSE_CURLY)
556 	{
557 		bool readNext = true;
558 
559 		switch (token->type)
560 		{
561 			case TOKEN_OPEN_CURLY:
562 				enterScope (token, NULL, -1);
563 				break;
564 
565 			case TOKEN_KEYWORD:
566 				readNext = parseFunction (token);
567 				break;
568 
569 			case TOKEN_VARIABLE:
570 				readNext = parseVariable (token);
571 				break;
572 
573 			default: break;
574 		}
575 
576 		if (readNext)
577 			readToken (token);
578 	}
579 
580 	copyToken (parentToken, token, false);
581 	parentToken->parentKind = origParentKind;
582 	deleteToken (token);
583 }
584 
findPowerShellTags(void)585 static void findPowerShellTags (void)
586 {
587 	tokenInfo *const token = newToken ();
588 
589 	do
590 	{
591 		enterScope (token, NULL, -1);
592 	}
593 	while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
594 
595 	deleteToken (token);
596 }
597 
PowerShellParser(void)598 extern parserDefinition* PowerShellParser (void)
599 {
600 	static const char *const extensions [] = { "ps1", "psm1", NULL };
601 	parserDefinition* def = parserNew ("PowerShell");
602 	def->kindTable  = PowerShellKinds;
603 	def->kindCount  = ARRAY_SIZE (PowerShellKinds);
604 	def->extensions = extensions;
605 	def->parser     = findPowerShellTags;
606 	return def;
607 }
608