1 /*
2 *   Copyright (c) 2009, Vincent Berthoux
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for generating tags for Objective Caml
8 *   language files.
9 */
10 /*
11 *   INCLUDE FILES
12 */
13 #include "general.h"	/* must always come first */
14 
15 #include <string.h>
16 
17 #include "debug.h"
18 #include "entry.h"
19 #include "keyword.h"
20 #include "options.h"
21 #include "parse.h"
22 #include "read.h"
23 #include "routines.h"
24 #include "vstring.h"
25 
26 #define OCAML_MAX_STACK_SIZE 256
27 
28 typedef enum {
29 	K_CLASS,        /* OCaml class, relatively rare */
30 	K_METHOD,       /* class method */
31 	K_MODULE,       /* OCaml module OR functor */
32 	K_VARIABLE,
33 	K_VAL,
34 	K_TYPE,         /* name of an OCaml type */
35 	K_FUNCTION,
36 	K_CONSTRUCTOR,  /* Constructor of a sum type */
37 	K_RECORDFIELD,
38 	K_EXCEPTION,
39 } ocamlKind;
40 
41 static kindDefinition OcamlKinds[] = {
42 	{true, 'c', "class", "classes"},
43 	{true, 'm', "method", "Object's method"},
44 	{true, 'M', "module", "Module or functor"},
45 	{true, 'v', "var", "Global variable"},
46 	{true, 'p', "val", "Signature item"},
47 	{true, 't', "type", "Type name"},
48 	{true, 'f', "function", "A function"},
49 	{true, 'C', "Constructor", "A constructor"},
50 	{true, 'r', "RecordField", "A 'structure' field"},
51 	{true, 'e', "Exception", "An exception"},
52 };
53 
54 typedef enum {
55 	OcaKEYWORD_and,
56 	OcaKEYWORD_begin,
57 	OcaKEYWORD_class,
58 	OcaKEYWORD_do,
59 	OcaKEYWORD_done,
60 	OcaKEYWORD_else,
61 	OcaKEYWORD_end,
62 	OcaKEYWORD_exception,
63 	OcaKEYWORD_for,
64 	OcaKEYWORD_functor,
65 	OcaKEYWORD_fun,
66 	OcaKEYWORD_function,
67 	OcaKEYWORD_if,
68 	OcaKEYWORD_in,
69 	OcaKEYWORD_let,
70 	OcaKEYWORD_value,
71 	OcaKEYWORD_match,
72 	OcaKEYWORD_method,
73 	OcaKEYWORD_module,
74 	OcaKEYWORD_mutable,
75 	OcaKEYWORD_object,
76 	OcaKEYWORD_of,
77 	OcaKEYWORD_rec,
78 	OcaKEYWORD_sig,
79 	OcaKEYWORD_struct,
80 	OcaKEYWORD_then,
81 	OcaKEYWORD_try,
82 	OcaKEYWORD_type,
83 	OcaKEYWORD_val,
84 	OcaKEYWORD_virtual,
85 	OcaKEYWORD_while,
86 	OcaKEYWORD_with,
87 
88 	OcaIDENTIFIER,
89 	Tok_PARL,       /* '(' */
90 	Tok_PARR,       /* ')' */
91 	Tok_BRL,        /* '[' */
92 	Tok_BRR,        /* ']' */
93 	Tok_CurlL,      /* '{' */
94 	Tok_CurlR,      /* '}' */
95 	Tok_Prime,      /* '\'' */
96 	Tok_Pipe,       /* '|' */
97 	Tok_EQ,         /* '=' */
98 	Tok_Val,        /* string/number/poo */
99 	Tok_Op,         /* any operator recognized by the language */
100 	Tok_semi,       /* ';' */
101 	Tok_comma,      /* ',' */
102 	Tok_To,         /* '->' */
103 	Tok_Of,         /* ':' */
104 	Tok_Sharp,      /* '#' */
105 	Tok_Backslash,  /* '\\' */
106 
107 	Tok_EOF         /* END of file */
108 } ocamlKeyword;
109 
110 typedef struct sOcaKeywordDesc {
111 	const char *name;
112 	ocamlKeyword id;
113 } ocaKeywordDesc;
114 
115 typedef ocamlKeyword ocaToken;
116 
117 static const keywordTable OcamlKeywordTable[] = {
118 	{ "and"       , OcaKEYWORD_and       },
119 	{ "begin"     , OcaKEYWORD_begin     },
120 	{ "class"     , OcaKEYWORD_class     },
121 	{ "do"        , OcaKEYWORD_do        },
122 	{ "done"      , OcaKEYWORD_done      },
123 	{ "else"      , OcaKEYWORD_else      },
124 	{ "end"       , OcaKEYWORD_end       },
125 	{ "exception" , OcaKEYWORD_exception },
126 	{ "for"       , OcaKEYWORD_for       },
127 	{ "fun"       , OcaKEYWORD_fun       },
128 	{ "function"  , OcaKEYWORD_fun       },
129 	{ "functor"   , OcaKEYWORD_functor   },
130 	{ "if"        , OcaKEYWORD_if        },
131 	{ "in"        , OcaKEYWORD_in        },
132 	{ "let"       , OcaKEYWORD_let       },
133 	{ "match"     , OcaKEYWORD_match     },
134 	{ "method"    , OcaKEYWORD_method    },
135 	{ "module"    , OcaKEYWORD_module    },
136 	{ "mutable"   , OcaKEYWORD_mutable   },
137 	{ "object"    , OcaKEYWORD_object    },
138 	{ "of"        , OcaKEYWORD_of        },
139 	{ "rec"       , OcaKEYWORD_rec       },
140 	{ "sig"       , OcaKEYWORD_sig       },
141 	{ "struct"    , OcaKEYWORD_struct    },
142 	{ "then"      , OcaKEYWORD_then      },
143 	{ "try"       , OcaKEYWORD_try       },
144 	{ "type"      , OcaKEYWORD_type      },
145 	{ "val"       , OcaKEYWORD_val       },
146 	{ "value"     , OcaKEYWORD_value     }, /* just to handle revised syntax */
147 	{ "virtual"   , OcaKEYWORD_virtual   },
148 	{ "while"     , OcaKEYWORD_while     },
149 	{ "with"      , OcaKEYWORD_with      },
150 
151 	{ "or"        , Tok_Op               },
152 	{ "mod "      , Tok_Op               },
153 	{ "land "     , Tok_Op               },
154 	{ "lor "      , Tok_Op               },
155 	{ "lxor "     , Tok_Op               },
156 	{ "lsl "      , Tok_Op               },
157 	{ "lsr "      , Tok_Op               },
158 	{ "asr"       , Tok_Op               },
159 	{ "->"        , Tok_To               },
160 	{ ":"         , Tok_Of               },
161 	{ "true"      , Tok_Val              },
162 	{ "false"     , Tok_Val              }
163 };
164 
165 static langType Lang_Ocaml;
166 
167 static bool exportLocalInfo = false;
168 
169 /*//////////////////////////////////////////////////////////////////
170 //// lexingInit             */
171 typedef struct _lexingState {
172 	vString *name;	/* current parsed identifier/operator */
173 	const unsigned char *cp;	/* position in stream */
174 } lexingState;
175 
176 /* array of the size of all possible value for a char */
177 static bool isOperator[1 << (8 * sizeof (char))] = { false };
178 
179 /* definition of all the operator in OCaml,
180  * /!\ certain operator get special treatment
181  * in regards of their role in OCaml grammar :
182  * '|' ':' '=' '~' and '?' */
initOperatorTable(void)183 static void initOperatorTable ( void )
184 {
185 	isOperator['!'] = true;
186 	isOperator['$'] = true;
187 	isOperator['%'] = true;
188 	isOperator['&'] = true;
189 	isOperator['*'] = true;
190 	isOperator['+'] = true;
191 	isOperator['-'] = true;
192 	isOperator['.'] = true;
193 	isOperator['/'] = true;
194 	isOperator[':'] = true;
195 	isOperator['<'] = true;
196 	isOperator['='] = true;
197 	isOperator['>'] = true;
198 	isOperator['?'] = true;
199 	isOperator['@'] = true;
200 	isOperator['^'] = true;
201 	isOperator['~'] = true;
202 	isOperator['|'] = true;
203 }
204 
205 /*//////////////////////////////////////////////////////////////////////
206 //// Lexing                                     */
isNum(char c)207 static bool isNum (char c)
208 {
209 	return c >= '0' && c <= '9';
210 }
211 
isLowerAlpha(char c)212 static bool isLowerAlpha (char c)
213 {
214 	return c >= 'a' && c <= 'z';
215 }
216 
isUpperAlpha(char c)217 static bool isUpperAlpha (char c)
218 {
219 	return c >= 'A' && c <= 'Z';
220 }
221 
isAlpha(char c)222 static bool isAlpha (char c)
223 {
224 	return isLowerAlpha (c) || isUpperAlpha (c);
225 }
226 
isIdent(char c)227 static bool isIdent (char c)
228 {
229 	return isNum (c) || isAlpha (c) || c == '_' || c == '\'';
230 }
231 
isSpace(char c)232 static bool isSpace (char c)
233 {
234 	return c == ' ' || c == '\t' || c == '\r' || c == '\n';
235 }
236 
eatWhiteSpace(lexingState * st)237 static void eatWhiteSpace (lexingState * st)
238 {
239 	const unsigned char *cp = st->cp;
240 	while (isSpace (*cp))
241 		cp++;
242 
243 	st->cp = cp;
244 }
245 
eatString(lexingState * st)246 static void eatString (lexingState * st)
247 {
248 	bool lastIsBackSlash = false;
249 	bool unfinished = true;
250 	const unsigned char *c = st->cp + 1;
251 
252 	while (unfinished)
253 	{
254 		/* end of line should never happen.
255 		 * we tolerate it */
256 		if (c == NULL || c[0] == '\0')
257 			break;
258 		else if (*c == '"' && !lastIsBackSlash)
259 			unfinished = false;
260 		else
261 			lastIsBackSlash = *c == '\\';
262 
263 		c++;
264 	}
265 
266 	st->cp = c;
267 }
268 
eatComment(lexingState * st)269 static void eatComment (lexingState * st)
270 {
271 	bool unfinished = true;
272 	bool lastIsStar = false;
273 	const unsigned char *c = st->cp + 2;
274 
275 	while (unfinished)
276 	{
277 		/* we've reached the end of the line..
278 		 * so we have to reload a line... */
279 		if (c == NULL || *c == '\0')
280 		{
281 			st->cp = readLineFromInputFile ();
282 			/* WOOPS... no more input...
283 			 * we return, next lexing read
284 			 * will be null and ok */
285 			if (st->cp == NULL)
286 				return;
287 			c = st->cp;
288 		}
289 		/* we've reached the end of the comment */
290 		else if (*c == ')' && lastIsStar)
291 		{
292 			unfinished = false;
293 			c++;
294 		}
295 		/* here we deal with imbricated comment, which
296 		 * are allowed in OCaml */
297 		else if (c[0] == '(' && c[1] == '*')
298 		{
299 			st->cp = c;
300 			eatComment (st);
301 
302 			c = st->cp;
303 			if (c == NULL)
304 				return;
305 
306 			lastIsStar = false;
307 			c++;
308 		}
309 		/* OCaml has a rule which says :
310 		 *
311 		 *   "Comments do not occur inside string or character literals.
312 		 *    Nested comments are handled correctly."
313 		 *
314 		 * So if we encounter a string beginning, we must parse it to
315 		 * get a good comment nesting (bug ID: 3117537)
316 		 */
317 		else if (*c == '"')
318 		{
319 			st->cp = c;
320 			eatString (st);
321 			c = st->cp;
322 		}
323 		else
324 		{
325 			lastIsStar = '*' == *c;
326 			c++;
327 		}
328 	}
329 
330 	st->cp = c;
331 }
332 
readIdentifier(lexingState * st)333 static void readIdentifier (lexingState * st)
334 {
335 	const unsigned char *p;
336 	vStringClear (st->name);
337 
338 	/* first char is a simple letter */
339 	if (isAlpha (*st->cp) || *st->cp == '_')
340 		vStringPut (st->name, (int) *st->cp);
341 
342 	/* Go till you get identifier chars */
343 	for (p = st->cp + 1; isIdent (*p); p++)
344 		vStringPut (st->name, (int) *p);
345 
346 	st->cp = p;
347 }
348 
eatNumber(lexingState * st)349 static ocamlKeyword eatNumber (lexingState * st)
350 {
351 	while (isNum (*st->cp))
352 		st->cp++;
353 	return Tok_Val;
354 }
355 
356 /* Operator can be defined in OCaml as a function
357  * so we must be ample enough to parse them normally */
eatOperator(lexingState * st)358 static ocamlKeyword eatOperator (lexingState * st)
359 {
360 	int count = 0;
361 	const unsigned char *root = st->cp;
362 
363 	vStringClear (st->name);
364 
365 	while (isOperator[st->cp[count]])
366 	{
367 		vStringPut (st->name, st->cp[count]);
368 		count++;
369 	}
370 
371 	st->cp += count;
372 	if (count <= 1)
373 	{
374 		switch (root[0])
375 		{
376 		case '|':
377 			return Tok_Pipe;
378 		case '=':
379 			return Tok_EQ;
380 		case ':':
381 			return Tok_Of;
382 		default:
383 			return Tok_Op;
384 		}
385 	}
386 	else if (count == 2 && root[0] == '-' && root[1] == '>')
387 		return Tok_To;
388 	else if (count == 2 && root[0] == '|' && root[1] == '>')
389 		return Tok_Op;
390 	else
391 		return Tok_Op;
392 }
393 
394 /* The lexer is in charge of reading the file.
395  * Some of sub-lexer (like eatComment) also read file.
396  * lexing is finished when the lexer return Tok_EOF */
lex(lexingState * st)397 static ocamlKeyword lex (lexingState * st)
398 {
399 	int retType;
400 	/* handling data input here */
401 	while (st->cp == NULL || st->cp[0] == '\0')
402 	{
403 		st->cp = readLineFromInputFile ();
404 		if (st->cp == NULL)
405 			return Tok_EOF;
406 	}
407 
408 	if (isAlpha (*st->cp))
409 	{
410 		readIdentifier (st);
411 		retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml);
412 
413 		if (retType == -1)	/* If it's not a keyword */
414 		{
415 			return OcaIDENTIFIER;
416 		}
417 		else
418 		{
419 			return retType;
420 		}
421 	}
422 	else if (isNum (*st->cp))
423 		return eatNumber (st);
424 	else if (isSpace (*st->cp))
425 	{
426 		eatWhiteSpace (st);
427 		return lex (st);
428 	}
429 	else if (*st->cp == '_')
430 	{	// special
431 		readIdentifier (st);
432 		return Tok_Val;
433 	}
434 
435 	/* OCaml permit the definition of our own operators
436 	 * so here we check all the consecutive chars which
437 	 * are operators to discard them. */
438 	else if (isOperator[*st->cp])
439 		return eatOperator (st);
440 	else
441 	{
442 		switch (*st->cp)
443 		{
444 		case '(':
445 			if (st->cp[1] == '*')	/* ergl, a comment */
446 			{
447 				eatComment (st);
448 				return lex (st);
449 			}
450 			else
451 			{
452 				st->cp++;
453 				return Tok_PARL;
454 			}
455 
456 		case ')':
457 			st->cp++;
458 			return Tok_PARR;
459 		case '[':
460 			st->cp++;
461 			return Tok_BRL;
462 		case ']':
463 			st->cp++;
464 			return Tok_BRR;
465 		case '{':
466 			st->cp++;
467 			return Tok_CurlL;
468 		case '}':
469 			st->cp++;
470 			return Tok_CurlR;
471 		case '\'':
472 			st->cp++;
473 			return Tok_Prime;
474 		case ',':
475 			st->cp++;
476 			return Tok_comma;
477 		case '=':
478 			st->cp++;
479 			return Tok_EQ;
480 		case ';':
481 			st->cp++;
482 			return Tok_semi;
483 		case '"':
484 			eatString (st);
485 			return Tok_Val;
486 		case '#':
487 			st->cp++;
488 			return Tok_Sharp;
489 		case '\\':
490 			st->cp++;
491 			return Tok_Backslash;
492 		default:
493 			st->cp++;
494 			break;
495 		}
496 	}
497 	/* default return if nothing is recognized,
498 	 * shouldn't happen, but at least, it will
499 	 * be handled without destroying the parsing. */
500 	return Tok_Val;
501 }
502 
503 /*//////////////////////////////////////////////////////////////////////
504 //// Parsing                                    */
505 typedef void (*parseNext) (vString * const ident, ocaToken what, ocaToken whatNext);
506 
507 /********** Helpers */
508 /* This variable hold the 'parser' which is going to
509  * handle the next token */
510 static parseNext toDoNext;
511 
512 /* Special variable used by parser eater to
513  * determine which action to put after their
514  * job is finished. */
515 static parseNext comeAfter;
516 
517 /* If a token put an end to current declaration/
518  * statement */
519 static ocaToken terminatingToken;
520 
521 /* Token to be searched by the different
522  * parser eater. */
523 static ocaToken waitedToken;
524 
525 /* name of the last class, used for
526  * context stacking. */
527 static vString *lastClass;
528 
529 typedef enum _sContextKind {
530 	ContextStrong,
531 	ContextSoft
532 } contextKind;
533 
534 typedef enum _sContextType {
535 	ContextType,
536 	ContextModule,
537 	ContextClass,
538 	ContextValue,
539 	ContextFunction,
540 	ContextMethod,
541 	ContextBlock,
542 	ContextMatch
543 } contextType;
544 
545 typedef struct _sOcamlContext {
546 	contextKind kind;	/* well if the context is strong or not */
547 	contextType type;
548 	parseNext callback;	/* what to do when a context is pop'd */
549 	vString *contextName;	/* name, if any, of the surrounding context */
550 } ocamlContext;
551 
552 /* context stack, can be used to output scope information
553  * into the tag file. */
554 static ocamlContext stack[OCAML_MAX_STACK_SIZE];
555 /* current position in the tag */
556 static int stackIndex;
557 
558 /* special function, often recalled, so putting it here */
559 static void globalScope (vString * const ident, ocaToken what, ocaToken whatNext);
560 
561 /* Return : index of the last named context if one
562  *          is found, -1 otherwise */
getLastNamedIndex(void)563 static int getLastNamedIndex ( void )
564 {
565 	int i;
566 
567 	for (i = stackIndex - 1; i >= 0; --i)
568 	{
569 		if (vStringLength (stack[i].contextName) > 0)
570 		{
571 			return i;
572 		}
573 	}
574 
575 	return -1;
576 }
577 
contextDescription(contextType t)578 static int contextDescription (contextType t)
579 {
580 	switch (t)
581 	{
582 	case ContextFunction:
583 		return K_FUNCTION;
584 	case ContextMethod:
585 		return K_METHOD;
586 	case ContextValue:
587 		return K_VAL;
588 	case ContextModule:
589 		return K_MODULE;
590 	case ContextType:
591 		return K_TYPE;
592 	case ContextClass:
593 		return K_CLASS;
594 	default:
595 		AssertNotReached();
596 		return KIND_GHOST_INDEX;
597 	}
598 }
599 
contextTypeSuffix(contextType t)600 static char contextTypeSuffix (contextType t)
601 {
602 	switch (t)
603 	{
604 	case ContextFunction:
605 	case ContextMethod:
606 	case ContextValue:
607 	case ContextModule:
608 		return '/';
609 	case ContextType:
610 		return '.';
611 	case ContextClass:
612 		return '#';
613 	case ContextBlock:
614 		return ' ';
615 	case ContextMatch:
616 		return '|';
617 	default:
618 		return '$';
619 	}
620 }
621 
622 /* Push a new context, handle null string */
pushContext(contextKind kind,contextType type,parseNext after,vString const * contextName)623 static void pushContext (contextKind kind, contextType type, parseNext after,
624 	vString const *contextName)
625 {
626 	int parentIndex;
627 
628 	if (stackIndex >= OCAML_MAX_STACK_SIZE)
629 	{
630 		verbose ("OCaml Maximum depth reached");
631 		return;
632 	}
633 
634 	stack[stackIndex].kind = kind;
635 	stack[stackIndex].type = type;
636 	stack[stackIndex].callback = after;
637 
638 	parentIndex = getLastNamedIndex ();
639 	if (contextName == NULL)
640 	{
641 		vStringClear (stack[stackIndex++].contextName);
642 		return;
643 	}
644 
645 	if (parentIndex >= 0)
646 	{
647 		vStringCopy (stack[stackIndex].contextName,
648 			stack[parentIndex].contextName);
649 		vStringPut (stack[stackIndex].contextName,
650 			contextTypeSuffix (stack[parentIndex].type));
651 
652 		vStringCat (stack[stackIndex].contextName, contextName);
653 	}
654 	else
655 		vStringCopy (stack[stackIndex].contextName, contextName);
656 
657 	stackIndex++;
658 }
659 
pushStrongContext(vString * name,contextType type)660 static void pushStrongContext (vString * name, contextType type)
661 {
662 	pushContext (ContextStrong, type, &globalScope, name);
663 }
664 
pushSoftContext(parseNext continuation,vString * name,contextType type)665 static void pushSoftContext (parseNext continuation,
666 	vString * name, contextType type)
667 {
668 	pushContext (ContextSoft, type, continuation, name);
669 }
670 
pushEmptyContext(parseNext continuation)671 static void pushEmptyContext (parseNext continuation)
672 {
673 	pushContext (ContextSoft, ContextValue, continuation, NULL);
674 }
675 
676 /* unroll the stack until the last named context.
677  * then discard it. Used to handle the :
678  * let f x y = ...
679  * in ...
680  * where the context is reseted after the in. Context may have
681  * been really nested before that. */
popLastNamed(void)682 static void popLastNamed ( void )
683 {
684 	int i = getLastNamedIndex ();
685 
686 	if (i >= 0)
687 	{
688 		stackIndex = i;
689 		toDoNext = stack[i].callback;
690 		vStringClear (stack[i].contextName);
691 	}
692 	else
693 	{
694 		/* ok, no named context found...
695 		 * (should not happen). */
696 		stackIndex = 0;
697 		toDoNext = &globalScope;
698 	}
699 }
700 
701 /* pop a context without regarding it's content
702  * (beside handling empty stack case) */
popSoftContext(void)703 static void popSoftContext ( void )
704 {
705 	if (stackIndex <= 0)
706 	{
707 		toDoNext = &globalScope;
708 	}
709 	else
710 	{
711 		stackIndex--;
712 		toDoNext = stack[stackIndex].callback;
713 		vStringClear (stack[stackIndex].contextName);
714 	}
715 }
716 
717 /* Reset everything until the last global space.
718  * a strong context can be :
719  * - module
720  * - class definition
721  * - the initial global space
722  * - a _global_ declaration (let at global scope or in a module).
723  * Created to exit quickly deeply nested context */
popStrongContext(void)724 static contextType popStrongContext ( void )
725 {
726 	int i;
727 
728 	for (i = stackIndex - 1; i >= 0; --i)
729 	{
730 		if (stack[i].kind == ContextStrong)
731 		{
732 			stackIndex = i;
733 			toDoNext = stack[i].callback;
734 			vStringClear (stack[i].contextName);
735 			return stack[i].type;
736 		}
737 	}
738 	/* ok, no strong context found... */
739 	stackIndex = 0;
740 	toDoNext = &globalScope;
741 	return -1;
742 }
743 
744 /* Reset everything before the last match. */
jumpToMatchContext(void)745 static void jumpToMatchContext ( void )
746 {
747 	int i;
748 	for (i = stackIndex - 1; i >= 0; --i)
749 	{
750 		if (stack[i].type == ContextMatch)
751 		{
752 			stackIndex = i + 1;
753 			toDoNext = stack[i].callback;	// this should always be
754 							// matchPattern
755 			stack[i + 1].callback = NULL;
756 			vStringClear (stack[i + 1].contextName);
757 			return;
758 		}
759 	}
760 }
761 
762 /* Ignore everything till waitedToken and jump to comeAfter.
763  * If the "end" keyword is encountered break, doesn't remember
764  * why though. */
tillToken(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)765 static void tillToken (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
766 {
767 	if (what == waitedToken)
768 		toDoNext = comeAfter;
769 	else if (what == OcaKEYWORD_end)
770 	{
771 		popStrongContext ();
772 		toDoNext = &globalScope;
773 	}
774 }
775 
776 /* Ignore everything till a waitedToken is seen, but
777  * take care of balanced parentheses/bracket use */
contextualTillToken(vString * const ident,ocaToken what,ocaToken whatNext)778 static void contextualTillToken (vString * const ident, ocaToken what, ocaToken whatNext)
779 {
780 	static int parentheses = 0;
781 	static int bracket = 0;
782 	static int curly = 0;
783 
784 	switch (what)
785 	{
786 	case Tok_PARL:
787 		parentheses--;
788 		break;
789 	case Tok_PARR:
790 		parentheses++;
791 		break;
792 	case Tok_CurlL:
793 		curly--;
794 		break;
795 	case Tok_CurlR:
796 		curly++;
797 		break;
798 	case Tok_BRL:
799 		bracket--;
800 		break;
801 	case Tok_BRR:
802 		bracket++;
803 		break;
804 
805 	default:	/* other token are ignored */
806 		break;
807 	}
808 
809 	if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0)
810 		toDoNext = comeAfter;
811 	else if (what == OcaKEYWORD_end)
812 		globalScope (ident, what, whatNext);
813 }
814 
815 /* Wait for waitedToken and jump to comeAfter or let
816  * the globalScope handle declarations */
tillTokenOrFallback(vString * const ident,ocaToken what,ocaToken whatNext)817 static void tillTokenOrFallback (vString * const ident, ocaToken what, ocaToken whatNext)
818 {
819 	if (what == waitedToken)
820 		toDoNext = comeAfter;
821 	else
822 		globalScope (ident, what, whatNext);
823 }
824 
825 /* ignore token till waitedToken, or give up if find
826  * terminatingToken. Use globalScope to handle new
827  * declarations. */
tillTokenOrTerminatingOrFallback(vString * const ident,ocaToken what,ocaToken whatNext)828 static void tillTokenOrTerminatingOrFallback (vString * const ident, ocaToken what, ocaToken whatNext)
829 {
830 	if (what == waitedToken)
831 		toDoNext = comeAfter;
832 	else if (what == terminatingToken)
833 		toDoNext = globalScope;
834 	else
835 		globalScope (ident, what, whatNext);
836 }
837 
838 /* ignore the next token in the stream and jump to the
839  * given comeAfter state */
ignoreToken(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what CTAGS_ATTR_UNUSED,ocaToken whatNext CTAGS_ATTR_UNUSED)840 static void ignoreToken (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what CTAGS_ATTR_UNUSED, ocaToken whatNext CTAGS_ATTR_UNUSED)
841 {
842 	toDoNext = comeAfter;
843 }
844 
845 /********** Grammar */
846 /* the purpose of each function is detailed near their
847  * implementation */
848 
killCurrentState(void)849 static contextType killCurrentState ( void )
850 {
851 	contextType popped = popStrongContext ();
852 
853 	/* Tracking the kind of previous strong
854 	 * context, if it doesn't match with a
855 	 * really strong entity, repop */
856 	switch (popped)
857 	{
858 	case ContextValue:
859 		popped = popStrongContext ();
860 		break;
861 	case ContextFunction:
862 		popped = popStrongContext ();
863 		break;
864 	case ContextMethod:
865 		popped = popStrongContext ();
866 		break;
867 	case ContextType:
868 		popped = popStrongContext ();
869 		break;
870 	case ContextMatch:
871 		popped = popStrongContext ();
872 		break;
873 	case ContextBlock:
874 		break;
875 	case ContextModule:
876 		break;
877 	case ContextClass:
878 		break;
879 	default:
880 		/* nothing more */
881 		break;
882 	}
883 	return popped;
884 }
885 
886 /* Keep track of our _true_ line number and file pos,
887  * as the lookahead token gives us false values. */
888 static unsigned long ocaLineNumber;
889 static MIOPos ocaFilePosition;
890 
891 /* Used to prepare an OCaml tag, just in case there is a need to
892  * add additional information to the tag. */
prepareTag(tagEntryInfo * tag,vString const * name,int kind)893 static void prepareTag (tagEntryInfo * tag, vString const *name, int kind)
894 {
895 	int parentIndex;
896 
897 	initTagEntry (tag, vStringValue (name), kind);
898 	/* Ripped out of read.h initTagEntry, because of line number
899 	 * shenanigans.
900 	 * Ugh. Lookahead is harder than I expected. */
901 	tag->lineNumber = ocaLineNumber;
902 	tag->filePosition = ocaFilePosition;
903 
904 	parentIndex = getLastNamedIndex ();
905 	if (parentIndex >= 0)
906 	{
907 		tag->extensionFields.scopeKindIndex =
908 			contextDescription (stack[parentIndex].type);
909 		tag->extensionFields.scopeName =
910 			vStringValue (stack[parentIndex].contextName);
911 	}
912 }
913 
914 /* Used to centralise tag creation, and be able to add
915  * more information to it in the future */
addTag(vString * const ident,int kind)916 static void addTag (vString * const ident, int kind)
917 {
918 	if (OcamlKinds [kind].enabled  &&  ident != NULL  &&  vStringLength (ident) > 0)
919 	{
920 		tagEntryInfo toCreate;
921 		prepareTag (&toCreate, ident, kind);
922 		makeTagEntry (&toCreate);
923 	}
924 }
925 
926 static bool needStrongPoping = false;
requestStrongPoping(void)927 static void requestStrongPoping ( void )
928 {
929 	needStrongPoping = true;
930 }
931 
cleanupPreviousParser(void)932 static void cleanupPreviousParser ( void )
933 {
934 	if (needStrongPoping)
935 	{
936 		needStrongPoping = false;
937 		popStrongContext ();
938 	}
939 }
940 
941 /* Due to some circular dependencies, the following functions
942  * must be forward-declared. */
943 static void letParam (vString * const ident, ocaToken what, ocaToken whatNext);
944 static void localScope (vString * const ident, ocaToken what, ocaToken whatNext);
945 static void mayRedeclare (vString * const ident, ocaToken what, ocaToken whatNext);
946 static void typeSpecification (vString * const ident, ocaToken what, ocaToken whatNext);
947 
948 /*
949  * Parse a record type
950  * type ident = // parsed previously
951  *  {
952  *      ident1: type1;
953  *      ident2: type2;
954  *  }
955  */
typeRecord(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)956 static void typeRecord (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
957 {
958 	switch (what)
959 	{
960 	case OcaIDENTIFIER:
961 		addTag (ident, K_RECORDFIELD);
962 		terminatingToken = Tok_CurlR;
963 		waitedToken = Tok_semi;
964 		comeAfter = &typeRecord;
965 		toDoNext = &tillTokenOrTerminatingOrFallback;
966 		break;
967 
968 	case OcaKEYWORD_mutable:
969 		/* ignore it */
970 		break;
971 
972 	case Tok_CurlR:
973 		popStrongContext ();
974 		// don't pop the module context when going to another expression
975 		needStrongPoping = false;
976 		toDoNext = &globalScope;
977 		break;
978 
979 	default:	/* don't care */
980 		break;
981 	}
982 }
983 
984 /* handle :
985  * exception ExceptionName of ... */
exceptionDecl(vString * const ident,ocaToken what,ocaToken whatNext)986 static void exceptionDecl (vString * const ident, ocaToken what, ocaToken whatNext)
987 {
988 	if (what == OcaIDENTIFIER)
989 	{
990 		addTag (ident, K_EXCEPTION);
991 	}
992 	else /* probably ill-formed, give back to global scope */
993 	{
994 		globalScope (ident, what, whatNext);
995 	}
996 	toDoNext = &globalScope;
997 }
998 
999 static tagEntryInfo tempTag;
1000 static vString *tempIdent;
1001 
1002 /* Ensure a constructor is not a type path beginning
1003  * with a module */
constructorValidation(vString * const ident,ocaToken what,ocaToken whatNext)1004 static void constructorValidation (vString * const ident, ocaToken what, ocaToken whatNext)
1005 {
1006 	switch (what)
1007 	{
1008 	case Tok_Op:	/* if we got a '.' which is an operator */
1009 		toDoNext = &globalScope;
1010 		popStrongContext ();
1011 		needStrongPoping = false;
1012 		break;
1013 
1014 	case OcaKEYWORD_of:	/* OK, it must be a constructor :) */
1015 		if (vStringLength (tempIdent) > 0)
1016 		{
1017 			makeTagEntry (&tempTag);
1018 			vStringClear (tempIdent);
1019 		}
1020 		toDoNext = &tillTokenOrFallback;
1021 		comeAfter = &typeSpecification;
1022 		waitedToken = Tok_Pipe;
1023 		break;
1024 
1025 	case Tok_Pipe:	/* OK, it was a constructor :)  */
1026 		if (vStringLength (tempIdent) > 0)
1027 		{
1028 			makeTagEntry (&tempTag);
1029 			vStringClear (tempIdent);
1030 		}
1031 		toDoNext = &typeSpecification;
1032 		break;
1033 
1034 	default:	/* and mean that we're not facing a module name */
1035 		if (vStringLength (tempIdent) > 0)
1036 		{
1037 			makeTagEntry (&tempTag);
1038 			vStringClear (tempIdent);
1039 		}
1040 		toDoNext = &tillTokenOrFallback;
1041 		comeAfter = &typeSpecification;
1042 		waitedToken = Tok_Pipe;
1043 
1044 		popStrongContext ();
1045 
1046 		// don't pop the module context when going to another expression
1047 		needStrongPoping = false;
1048 
1049 		/* to be sure we use this token */
1050 		globalScope (ident, what, whatNext);
1051 	}
1052 }
1053 
1054 /* Parse beginning of type definition
1055  * type 'avar ident =
1056  * or
1057  * type ('var1, 'var2) ident =
1058  */
typeDecl(vString * const ident,ocaToken what,ocaToken whatNext)1059 static void typeDecl (vString * const ident, ocaToken what, ocaToken whatNext)
1060 {
1061 	switch (what)
1062 	{
1063 		/* parameterized */
1064 	case Tok_Prime:
1065 		comeAfter = &typeDecl;
1066 		toDoNext = &ignoreToken;
1067 		break;
1068 		/* LOTS of parameters */
1069 	case Tok_PARL:
1070 		comeAfter = &typeDecl;
1071 		waitedToken = Tok_PARR;
1072 		toDoNext = &tillToken;
1073 		break;
1074 
1075 	case OcaIDENTIFIER:
1076 		addTag (ident, K_TYPE);
1077 		// true type declaration
1078 		if (whatNext == Tok_EQ)
1079 		{
1080 			pushStrongContext (ident, ContextType);
1081 			requestStrongPoping ();
1082 			toDoNext = &typeSpecification;
1083 		}
1084 		else // we're in a sig
1085 			toDoNext = &globalScope;
1086 		break;
1087 
1088 	default:
1089 		globalScope (ident, what, whatNext);
1090 	}
1091 }
1092 
1093 /** handle 'val' signatures in sigs and .mli files
1094   * val ident : String.t -> Val.t
1095   * Eventually, this will do cool things to annotate
1096   * functions with their actual signatures. But for now,
1097   * it's basically globalLet */
val(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1098 static void val (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1099 {
1100 	switch (what)
1101 	{
1102 	case Tok_PARL:
1103 	case OcaKEYWORD_rec:
1104 		break;
1105 
1106 	case Tok_Op:
1107 		/* we are defining a new operator, it's a
1108 		 * function definition */
1109 		addTag (ident, K_VAL);
1110 		toDoNext = &globalScope;
1111 		break;
1112 
1113 	case Tok_Val:	/* Can be a weiiird binding, or an '_' */
1114 	case OcaIDENTIFIER:
1115 		addTag (ident, K_VAL);
1116 		toDoNext = &globalScope;	// sig parser ?
1117 		break;
1118 
1119 	default:
1120 		toDoNext = &globalScope;
1121 		break;
1122 	}
1123 }
1124 
1125 /* Parse type of kind
1126  * type bidule = Ctor1 of ...
1127  *             | Ctor2
1128  *             | Ctor3 of ...
1129  * or
1130  * type bidule = | Ctor1 of ... | Ctor2
1131  *
1132  * when type bidule = { ... } is detected,
1133  * let typeRecord handle it. */
typeSpecification(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1134 static void typeSpecification (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1135 {
1136 	switch (what)
1137 	{
1138 	case OcaIDENTIFIER:
1139 		if (isUpperAlpha (vStringChar (ident, 0)))
1140 		{
1141 			/* here we handle type aliases of type
1142 			 * type foo = AnotherModule.bar
1143 			 * AnotherModule can mistakenly be took
1144 			 * for a constructor. */
1145 			if (! OcamlKinds[K_CONSTRUCTOR].enabled)
1146 				vStringClear (tempIdent);
1147 			else
1148 			{
1149 				vStringCopy (tempIdent, ident);
1150 				prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR);
1151 			}
1152 			toDoNext = &constructorValidation;
1153 		}
1154 		else
1155 		{
1156 			toDoNext = &tillTokenOrFallback;
1157 			comeAfter = &typeSpecification;
1158 			waitedToken = Tok_Pipe;
1159 		}
1160 		break;
1161 
1162 	case OcaKEYWORD_and:
1163 		toDoNext = &typeDecl;
1164 		break;
1165 
1166 	case OcaKEYWORD_val:
1167 		toDoNext = &val;
1168 		break;
1169 
1170 	case Tok_BRL:	/* the '[' & ']' are ignored to accommodate */
1171 	case Tok_BRR:	/* with the revised syntax */
1172 	case Tok_Pipe:
1173 		/* just ignore it */
1174 		break;
1175 
1176 	case Tok_CurlL:
1177 		toDoNext = &typeRecord;
1178 		break;
1179 
1180 	default:	/* don't care */
1181 		break;
1182 	}
1183 }
1184 
1185 
1186 static bool dirtySpecialParam = false;
1187 
1188 /* parse the ~label and ~label:type parameter */
parseLabel(vString * const ident,ocaToken what,ocaToken whatNext)1189 static void parseLabel (vString * const ident, ocaToken what, ocaToken whatNext)
1190 {
1191 	static int parCount = 0;
1192 
1193 	switch (what)
1194 	{
1195 	case OcaIDENTIFIER:
1196 		if (!dirtySpecialParam)
1197 		{
1198 			if (exportLocalInfo)
1199 				addTag (ident, K_VARIABLE);
1200 
1201 			dirtySpecialParam = true;
1202 		}
1203 		break;
1204 
1205 	case Tok_PARL:
1206 		parCount++;
1207 		break;
1208 
1209 	case Tok_PARR:
1210 		parCount--;
1211 		if (parCount == 0)
1212 			toDoNext = &letParam;
1213 		break;
1214 
1215 	case Tok_Op:
1216 		if (vStringChar(ident, 0) == ':')
1217 		{
1218 			toDoNext = &ignoreToken;
1219 			comeAfter = &letParam;
1220 		}
1221 		else if (parCount == 0 && dirtySpecialParam)
1222 		{
1223 			toDoNext = &letParam;
1224 			letParam (ident, what, whatNext);
1225 		}
1226 		break;
1227 
1228 	default:
1229 		if (parCount == 0 && dirtySpecialParam)
1230 		{
1231 			toDoNext = &letParam;
1232 			letParam (ident, what, whatNext);
1233 		}
1234 		break;
1235 	}
1236 }
1237 
1238 /* Optional argument with syntax like this :
1239  * ?(foo = value) */
parseOptionnal(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1240 static void parseOptionnal (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1241 {
1242 	static int parCount = 0;
1243 
1244 	switch (what)
1245 	{
1246 	case OcaIDENTIFIER:
1247 		if (!dirtySpecialParam)
1248 		{
1249 			if (exportLocalInfo)
1250 				addTag (ident, K_VARIABLE);
1251 
1252 			dirtySpecialParam = true;
1253 
1254 			if (parCount == 0)
1255 				toDoNext = &letParam;
1256 		}
1257 		break;
1258 
1259 	case Tok_PARL:
1260 		parCount++;
1261 		break;
1262 
1263 	case Tok_PARR:
1264 		parCount--;
1265 		if (parCount == 0)
1266 			toDoNext = &letParam;
1267 		break;
1268 
1269 	default:	/* don't care */
1270 		break;
1271 	}
1272 }
1273 
1274 /** handle let inside functions (so like it's name
1275  * say : local let */
localLet(vString * const ident,ocaToken what,ocaToken whatNext)1276 static void localLet (vString * const ident, ocaToken what, ocaToken whatNext)
1277 {
1278 	switch (what)
1279 	{
1280 	case Tok_PARL:
1281 		/* We ignore this token to be able to parse such
1282 		 * declarations :
1283 		 * let (ident : type) = ...
1284 		 */
1285 		break;
1286 
1287 	case OcaKEYWORD_rec:
1288 		/* just ignore to be able to parse such declarations:
1289 		 * let rec ident = ... */
1290 		break;
1291 
1292 	case Tok_Op:
1293 		/* we are defining a new operator, it's a
1294 		 * function definition */
1295 		if (exportLocalInfo)
1296 			addTag (ident, K_FUNCTION);
1297 		pushSoftContext (mayRedeclare, ident, ContextFunction);
1298 		toDoNext = &letParam;
1299 		break;
1300 
1301 	case Tok_Val:	/* Can be a weiiird binding, or an '_' */
1302 	case OcaIDENTIFIER:
1303 		// if we're an identifier, and the next token is too, then
1304 		// we're definitely a function.
1305 		if (whatNext == OcaIDENTIFIER || whatNext == Tok_PARL)
1306 		{
1307 			if (exportLocalInfo)
1308 				addTag (ident, K_FUNCTION);
1309 			pushSoftContext (mayRedeclare, ident, ContextFunction);
1310 		}
1311 		else
1312 		{
1313 			if (exportLocalInfo)
1314 				addTag (ident, K_VARIABLE);
1315 			pushSoftContext (mayRedeclare, ident, ContextValue);
1316 		}
1317 		toDoNext = &letParam;
1318 		break;
1319 
1320 	case OcaKEYWORD_end:
1321 		localScope (ident, what, whatNext);
1322 		break;
1323 
1324 	default:
1325 		toDoNext = &localScope;
1326 		break;
1327 	}
1328 }
1329 
1330 /* parse :
1331  * | pattern pattern -> ...
1332  * or
1333  * pattern apttern apttern -> ...
1334  * we ignore all identifiers declared in the pattern,
1335  * because their scope is likely to be even more limited
1336  * than the let definitions.
1337  * Used after a match ... with, or a function ...
1338  * because their syntax is similar.  */
matchPattern(vString * const ident,ocaToken what,ocaToken whatNext)1339 static void matchPattern (vString * const ident, ocaToken what, ocaToken whatNext)
1340 {
1341 	/* keep track of [], as it
1342 	 * can be used in patterns and can
1343 	 * mean the end of match expression in
1344 	 * revised syntax */
1345 	static int braceCount = 0;
1346 
1347 	switch (what)
1348 	{
1349 	case Tok_To:
1350 		pushEmptyContext (&matchPattern);
1351 		toDoNext = &mayRedeclare;
1352 		break;
1353 
1354 	case Tok_BRL:
1355 	braceCount++;
1356 	break;
1357 
1358 	case OcaKEYWORD_value:
1359 		popLastNamed ();
1360 	case OcaKEYWORD_and:
1361 	case OcaKEYWORD_end:
1362 		// why was this global? matches only make sense in local scope
1363 		localScope (ident, what, whatNext);
1364 		break;
1365 
1366 	case OcaKEYWORD_in:
1367 		popLastNamed ();
1368 		break;
1369 
1370 	default:
1371 		break;
1372 	}
1373 }
1374 
1375 /* Used at the beginning of a new scope (begin of a
1376  * definition, parenthesis...) to catch inner let
1377  * definition that may be in. */
mayRedeclare(vString * const ident,ocaToken what,ocaToken whatNext)1378 static void mayRedeclare (vString * const ident, ocaToken what, ocaToken whatNext)
1379 {
1380 	switch (what)
1381 	{
1382 	case OcaKEYWORD_value:
1383 	/* let globalScope handle it */
1384 	globalScope (ident, what, whatNext);
1385 
1386 	case OcaKEYWORD_let:
1387 		toDoNext = &localLet;
1388 		break;
1389 
1390 	case OcaKEYWORD_val:
1391 		toDoNext = &val;
1392 		break;
1393 
1394 	case OcaKEYWORD_object:
1395 		vStringClear (lastClass);
1396 		pushContext (ContextStrong, ContextClass,
1397 			&localScope, NULL);
1398 		needStrongPoping = false;
1399 		toDoNext = &globalScope;
1400 		break;
1401 
1402 	case OcaKEYWORD_for:
1403 	case OcaKEYWORD_while:
1404 		toDoNext = &tillToken;
1405 		waitedToken = OcaKEYWORD_do;
1406 		comeAfter = &mayRedeclare;
1407 		break;
1408 
1409 	case OcaKEYWORD_try:
1410 		toDoNext = &mayRedeclare;
1411 		pushSoftContext (&matchPattern, ident, ContextFunction);
1412 		break;
1413 
1414 	case OcaKEYWORD_function:
1415 		toDoNext = &matchPattern;
1416 		pushSoftContext (&matchPattern, NULL, ContextMatch);
1417 		break;
1418 
1419 	case OcaKEYWORD_fun:
1420 		toDoNext = &letParam;
1421 		break;
1422 
1423 		/* Handle the special ;; from the OCaml
1424 		 * Top level */
1425 	case Tok_semi:
1426 	default:
1427 		toDoNext = &localScope;
1428 		localScope (ident, what, whatNext);
1429 	}
1430 }
1431 
1432 /* parse :
1433  * p1 p2 ... pn = ...
1434  * or
1435  * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */
letParam(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1436 static void letParam (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1437 {
1438 	switch (what)
1439 	{
1440 	case Tok_To:
1441 	case Tok_EQ:
1442 		toDoNext = &mayRedeclare;
1443 		break;
1444 
1445 	case OcaIDENTIFIER:
1446 		if (exportLocalInfo)
1447 			addTag (ident, K_VARIABLE);
1448 		break;
1449 
1450 	case Tok_Op:
1451 		switch (vStringChar (ident, 0))
1452 		{
1453 		case ':':
1454 			/*popSoftContext(); */
1455 			/* we got a type signature */
1456 			comeAfter = &mayRedeclare;
1457 			toDoNext = &tillTokenOrFallback;
1458 			waitedToken = Tok_EQ;
1459 			break;
1460 
1461 			/* parse something like
1462 			 * ~varname:type
1463 			 * or
1464 			 * ~varname
1465 			 * or
1466 			 * ~(varname: long type) */
1467 		case '~':
1468 			toDoNext = &parseLabel;
1469 			dirtySpecialParam = false;
1470 			break;
1471 
1472 			/* Optional argument with syntax like this :
1473 			 * ?(bla = value)
1474 			 * or
1475 			 * ?bla */
1476 		case '?':
1477 			toDoNext = &parseOptionnal;
1478 			dirtySpecialParam = false;
1479 			break;
1480 
1481 		default:
1482 			break;
1483 		}
1484 		break;
1485 
1486 	default:	/* don't care */
1487 		break;
1488 	}
1489 }
1490 
1491 /* parse object ...
1492  * used to be sure the class definition is not a type
1493  * alias */
classSpecif(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1494 static void classSpecif (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1495 {
1496 	switch (what)
1497 	{
1498 	case OcaKEYWORD_object:
1499 		pushStrongContext (lastClass, ContextClass);
1500 		toDoNext = &globalScope;
1501 		break;
1502 
1503 	default:
1504 		vStringClear (lastClass);
1505 		toDoNext = &globalScope;
1506 	}
1507 }
1508 
1509 /* Handle a method ... class declaration.
1510  * nearly a copy/paste of globalLet. */
methodDecl(vString * const ident,ocaToken what,ocaToken whatNext)1511 static void methodDecl (vString * const ident, ocaToken what, ocaToken whatNext)
1512 {
1513 	switch (what)
1514 	{
1515 	case Tok_PARL:
1516 		/* We ignore this token to be able to parse such
1517 		 * declarations :
1518 		 * let (ident : type) = ...  */
1519 		break;
1520 
1521 	case OcaKEYWORD_mutable:
1522 	case OcaKEYWORD_virtual:
1523 	case OcaKEYWORD_rec:
1524 		/* just ignore to be able to parse such declarations:
1525 		 * let rec ident = ... */
1526 		break;
1527 
1528 	case OcaIDENTIFIER:
1529 		addTag (ident, K_METHOD);
1530 		/* Normal pushing to get good subs */
1531 		pushStrongContext (ident, ContextMethod);
1532 		/*pushSoftContext( globalScope, ident, ContextMethod ); */
1533 		toDoNext = &letParam;
1534 		break;
1535 
1536 	case OcaKEYWORD_end:
1537 		localScope (ident, what, whatNext);
1538 		break;
1539 
1540 	default:
1541 		toDoNext = &globalScope;
1542 		break;
1543 	}
1544 }
1545 
1546 /* name of the last module, used for
1547  * context stacking. */
1548 static vString *lastModule;
1549 
1550 /* parse
1551  * ... struct (* new global scope *) end
1552  * or
1553  * ... sig (* new global scope *) end
1554  * or
1555  * functor ... -> moduleSpecif
1556  */
moduleSpecif(vString * const ident,ocaToken what,ocaToken whatNext)1557 static void moduleSpecif (vString * const ident, ocaToken what, ocaToken whatNext)
1558 {
1559 	switch (what)
1560 	{
1561 	case OcaKEYWORD_functor:
1562 		toDoNext = &contextualTillToken;
1563 		waitedToken = Tok_To;
1564 		comeAfter = &moduleSpecif;
1565 		break;
1566 
1567 	case OcaKEYWORD_struct:
1568 	case OcaKEYWORD_sig:
1569 		pushStrongContext (lastModule, ContextModule);
1570 		toDoNext = &globalScope;
1571 		needStrongPoping = false;
1572 		break;
1573 
1574 	case Tok_PARL:	/* ( */
1575 		toDoNext = &contextualTillToken;
1576 		comeAfter = &globalScope;
1577 		waitedToken = Tok_PARR;
1578 		contextualTillToken (ident, what, whatNext);
1579 		break;
1580 
1581 	case Tok_Of:
1582 	case Tok_EQ:
1583 		break;
1584 
1585 	default:
1586 		vStringClear (lastModule);
1587 		toDoNext = &globalScope;
1588 		break;
1589 	}
1590 }
1591 
1592 /* parse :
1593  * module name = ...
1594  * then pass the token stream to moduleSpecif */
moduleDecl(vString * const ident,ocaToken what,ocaToken whatNext)1595 static void moduleDecl (vString * const ident, ocaToken what, ocaToken whatNext)
1596 {
1597 	switch (what)
1598 	{
1599 	case OcaKEYWORD_rec:
1600 		/* recursive modules are _weird_, but they happen */
1601 	case OcaKEYWORD_type:
1602 		/* this is technically a special type, but whatever */
1603 		break;
1604 
1605 	case OcaIDENTIFIER:
1606 		addTag (ident, K_MODULE);
1607 		vStringCopy (lastModule, ident);
1608 		if (whatNext == Tok_Of || whatNext == Tok_EQ)
1609 			toDoNext = &moduleSpecif;
1610 		else
1611 		{
1612 			// default to waiting on a '=' since
1613 			// module M : sig ... end = struct ... end
1614 			// is rarer
1615 			waitedToken = Tok_EQ;
1616 			comeAfter = &moduleSpecif;
1617 			toDoNext = &contextualTillToken;
1618 		}
1619 		break;
1620 
1621 	default:	/* don't care */
1622 		break;
1623 	}
1624 }
1625 
1626 /* parse :
1627  * class name = ...
1628  * or
1629  * class virtual ['a,'b] classname = ... */
classDecl(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1630 static void classDecl (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1631 {
1632 	switch (what)
1633 	{
1634 	case OcaIDENTIFIER:
1635 		addTag (ident, K_CLASS);
1636 		vStringCopy (lastClass, ident);
1637 		toDoNext = &contextualTillToken;
1638 		waitedToken = Tok_EQ;
1639 		comeAfter = &classSpecif;
1640 		break;
1641 
1642 	case Tok_BRL:
1643 		toDoNext = &tillToken;
1644 		waitedToken = Tok_BRR;
1645 		comeAfter = &classDecl;
1646 		break;
1647 
1648 	default:
1649 		break;
1650 	}
1651 }
1652 
1653 /* Handle a global
1654  * let ident ...
1655  * or
1656  * let rec ident ... */
globalLet(vString * const ident,ocaToken what,ocaToken whatNext)1657 static void globalLet (vString * const ident, ocaToken what, ocaToken whatNext)
1658 {
1659 	switch (what)
1660 	{
1661 	case Tok_PARL:
1662 		/* We ignore this token to be able to parse such
1663 		 * declarations :
1664 		 * let (ident : type) = ...
1665 		 * but () is the toplevel function name, so fake ourselves
1666 		 * as an ident and make a new function */
1667 		if (whatNext == Tok_PARR)
1668 		{
1669 			vString *fakeIdent = vStringNewInit ("()");
1670 			addTag (fakeIdent, K_FUNCTION);
1671 			pushStrongContext (fakeIdent, ContextFunction);
1672 			vStringDelete (fakeIdent);
1673 			requestStrongPoping ();
1674 			toDoNext = &letParam;
1675 		}
1676 		break;
1677 
1678 	case OcaKEYWORD_mutable:
1679 	case OcaKEYWORD_virtual:
1680 	case OcaKEYWORD_rec:
1681 		/* just ignore to be able to parse such declarations:
1682 		 * let rec ident = ... */
1683 		break;
1684 
1685 	case Tok_Op:
1686 		/* we are defining a new operator, it's a
1687 		 * function definition */
1688 		addTag (ident, K_FUNCTION);
1689 		pushStrongContext (ident, ContextFunction);
1690 		toDoNext = &letParam;
1691 		break;
1692 
1693 	case Tok_Val:
1694 		if (vStringValue (ident)[0] == '_')
1695 			addTag (ident, K_FUNCTION);
1696 		pushStrongContext (ident, ContextFunction);
1697 		requestStrongPoping ();
1698 		toDoNext = &letParam;
1699 		break;
1700 
1701 	case OcaIDENTIFIER:
1702 		// if we're an identifier, and the next token is too, then
1703 		// we're definitely a function.
1704 		if (whatNext == OcaIDENTIFIER || whatNext == Tok_PARL)
1705 		{
1706 			addTag (ident, K_FUNCTION);
1707 			pushStrongContext (ident, ContextFunction);
1708 		}
1709 		else
1710 		{
1711 			addTag (ident, K_VARIABLE);
1712 			pushStrongContext (ident, ContextValue);
1713 		}
1714 		requestStrongPoping ();
1715 		toDoNext = &letParam;
1716 		break;
1717 
1718 	case OcaKEYWORD_end:
1719 		globalScope (ident, what, whatNext);
1720 		break;
1721 
1722 	default:
1723 		toDoNext = &globalScope;
1724 		break;
1725 	}
1726 }
1727 
1728 /* Handle the "strong" top levels, all 'big' declarations
1729  * happen here */
globalScope(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what,ocaToken whatNext)1730 static void globalScope (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext)
1731 {
1732 	/* Do not touch, this is used only by the global scope
1733 	 * to handle an 'and' */
1734 	static parseNext previousParser = &globalScope;
1735 
1736 	switch (what)
1737 	{
1738 	case OcaKEYWORD_and:
1739 		cleanupPreviousParser ();
1740 		// deal with module M = struct ... end _and_ N = struct ... end
1741 		toDoNext = previousParser;
1742 		break;
1743 
1744 	case OcaKEYWORD_type:
1745 		cleanupPreviousParser ();
1746 		toDoNext = &typeDecl;
1747 		previousParser = &typeDecl;
1748 		break;
1749 
1750 	case OcaKEYWORD_class:
1751 		cleanupPreviousParser ();
1752 		toDoNext = &classDecl;
1753 		previousParser = &classDecl;
1754 		break;
1755 
1756 	case OcaKEYWORD_module:
1757 		cleanupPreviousParser ();
1758 		toDoNext = &moduleDecl;
1759 		previousParser = &moduleDecl;
1760 		break;
1761 
1762 	case OcaKEYWORD_end:;
1763 		contextType popped = killCurrentState ();
1764 
1765 		/** so here, end can legally be followed by = or and in the
1766 		 * situation of
1767 		 * module M : sig ... end = struct ... end  and
1768 		 * module M struct ... end and N = struct ... end
1769 		 * and we need to make sure we know we're still inside of a
1770 		 * struct */
1771 		if (whatNext == Tok_EQ && popped == ContextModule)
1772 		{
1773 			previousParser = &moduleDecl;
1774 			toDoNext = &moduleSpecif;
1775 		}
1776 		else if (whatNext == OcaKEYWORD_and && popped == ContextModule)
1777 			toDoNext = &moduleDecl;
1778 		needStrongPoping = false;
1779 		break;
1780 
1781 	case OcaKEYWORD_method:
1782 		cleanupPreviousParser ();
1783 		toDoNext = &methodDecl;
1784 		/* and is not allowed in methods */
1785 		break;
1786 
1787 	case OcaKEYWORD_val:
1788 		toDoNext = &val;
1789 		/* and is not allowed in sigs */
1790 		break;
1791 
1792 	case OcaKEYWORD_let:
1793 		cleanupPreviousParser ();
1794 		toDoNext = &globalLet;
1795 		previousParser = &globalLet;
1796 		break;
1797 
1798 	case OcaKEYWORD_exception:
1799 		cleanupPreviousParser ();
1800 		toDoNext = &exceptionDecl;
1801 		previousParser = &globalScope;
1802 		break;
1803 
1804 		/* must be a #line directive, discard the
1805 		 * whole line. */
1806 	case Tok_Sharp:
1807 		/* ignore */
1808 		break;
1809 
1810 	default:
1811 		/* we don't care */
1812 		break;
1813 	}
1814 }
1815 
1816 /* Parse expression. Well ignore it is more the case,
1817  * ignore all tokens except "shocking" keywords */
localScope(vString * const ident,ocaToken what,ocaToken whatNext)1818 static void localScope (vString * const ident, ocaToken what, ocaToken whatNext)
1819 {
1820 	switch (what)
1821 	{
1822 
1823 		// we're probably in a match, so let's go to the last one
1824 	case Tok_Pipe:
1825 		jumpToMatchContext ();
1826 		break;
1827 
1828 	case Tok_PARR:
1829 	case Tok_BRR:
1830 	case Tok_CurlR:
1831 		popSoftContext ();
1832 		break;
1833 
1834 		/* Everything that `begin` has an `end`
1835 		 * as end is overloaded and signal many end
1836 		 * of things, we add an empty strong context to
1837 		 * avoid problem with the end.
1838 		 */
1839 	case OcaKEYWORD_begin:
1840 		pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL);
1841 		toDoNext = &mayRedeclare;
1842 		break;
1843 
1844 		/* An in keyword signals the end of the previous context and the
1845 		 * start of a new one. */
1846 	case OcaKEYWORD_in:
1847 		popLastNamed ();
1848 		pushEmptyContext (&localScope);
1849 		toDoNext = &mayRedeclare;
1850 		break;
1851 
1852 		/* Ok, we got a '{', which is much likely to create
1853 		 * a record. We cannot treat it like other [ && (,
1854 		 * because it may contain the 'with' keyword and screw
1855 		 * everything else. */
1856 	case Tok_CurlL:
1857 		toDoNext = &contextualTillToken;
1858 		waitedToken = Tok_CurlR;
1859 		comeAfter = &localScope;
1860 		contextualTillToken (ident, what, whatNext);
1861 		break;
1862 
1863 		/* Yeah imperative feature of OCaml,
1864 		 * a ';' like in C */
1865 	case Tok_semi:
1866 		/* ';;' case should end all scopes */
1867 		if (whatNext == Tok_semi)
1868 		{
1869 			popStrongContext ();
1870 			toDoNext = &globalScope;
1871 			break;
1872 		}	/* else fallthrough */
1873 
1874 		/* Every standard operator has very high precedence
1875 		 * e.g. expr * expr needs no parentheses */
1876 	case Tok_Op:
1877 		toDoNext = &mayRedeclare;
1878 		break;
1879 
1880 	case Tok_PARL:
1881 	case Tok_BRL:
1882 		pushEmptyContext (&localScope);
1883 		toDoNext = &mayRedeclare;
1884 		break;
1885 
1886 	case OcaKEYWORD_and:
1887 		if (toDoNext == &mayRedeclare)
1888 		{
1889 			popSoftContext ();
1890 			pushEmptyContext (localScope);
1891 			toDoNext = &localLet;
1892 		}
1893 		else
1894 		{
1895 			/* a local 'and' keyword jumps up a context to the last
1896 			 * named. For ex
1897 			 * in `with let IDENT ... and IDENT2 ...` ident and
1898 			 * ident2 are on
1899 			 * same level, the same as `let IDENT ... in let IDENT2
1900 			 * ...`
1901 			 * a 'let' is the only 'and'-chainable construct allowed
1902 			 * locally
1903 			 * (thus we had to be one to get here), so we either go
1904 			 * to
1905 			 * globalLet or localLet depending on our scope. */
1906 			popLastNamed ();
1907 			toDoNext = stackIndex == 0 ? &globalLet : &localLet;
1908 		}
1909 		break;
1910 
1911 	case OcaKEYWORD_else:
1912 	case OcaKEYWORD_then:
1913 		popSoftContext ();
1914 		pushEmptyContext (&localScope);
1915 		toDoNext = &mayRedeclare;
1916 		break;
1917 
1918 	case OcaKEYWORD_if:
1919 		pushEmptyContext (&localScope);
1920 		toDoNext = &mayRedeclare;
1921 		break;
1922 
1923 	case OcaKEYWORD_match:
1924 		pushEmptyContext (&localScope);
1925 		toDoNext = &mayRedeclare;
1926 		break;
1927 
1928 	case OcaKEYWORD_with:
1929 		popSoftContext ();
1930 		toDoNext = &matchPattern;
1931 		pushSoftContext (&matchPattern, NULL, ContextMatch);
1932 		break;
1933 
1934 	case OcaKEYWORD_fun:
1935 		toDoNext = &letParam;
1936 		break;
1937 
1938 	case OcaKEYWORD_done:
1939 		/* doesn't care */
1940 		break;
1941 
1942 	default:
1943 		requestStrongPoping ();
1944 		globalScope (ident, what, whatNext);
1945 		break;
1946 	}
1947 }
1948 
1949 /*////////////////////////////////////////////////////////////////
1950 //// Deal with the system                                       */
1951 /* in OCaml the file name is the module name used in the language
1952  * with it first letter put in upper case */
computeModuleName(void)1953 static void computeModuleName ( void )
1954 {
1955 	/* in OCaml the file name define a module.
1956 	 * so we define a module if the file has
1957 	 * things in it. =)
1958 	 */
1959 	const char *filename = getInputFileName ();
1960 
1961 	int beginIndex = 0;
1962 	int endIndex = strlen (filename) - 1;
1963 	vString *moduleName = vStringNew ();
1964 
1965 	while (filename[endIndex] != '.' && endIndex > 0)
1966 		endIndex--;
1967 
1968 	/* avoid problem with path in front of filename */
1969 	beginIndex = endIndex;
1970 	while (beginIndex > 0)
1971 	{
1972 		if (filename[beginIndex] == '\\' || filename[beginIndex] == '/')
1973 		{
1974 			beginIndex++;
1975 			break;
1976 		}
1977 
1978 		beginIndex--;
1979 	}
1980 
1981 	vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex);
1982 
1983 	if (isLowerAlpha (vStringChar (moduleName, 0)))
1984 		vStringChar (moduleName, 0) += ('A' - 'a');
1985 
1986 	addTag (moduleName, K_MODULE);
1987 	vStringDelete (moduleName);
1988 }
1989 
1990 /* Allocate all string of the context stack */
initStack(void)1991 static void initStack ( void )
1992 {
1993 	int i;
1994 	for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
1995 		stack[i].contextName = vStringNew ();
1996 	stackIndex = 0;
1997 }
1998 
clearStack(void)1999 static void clearStack ( void )
2000 {
2001 	int i;
2002 	for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
2003 		vStringDelete (stack[i].contextName);
2004 }
2005 
findOcamlTags(void)2006 static void findOcamlTags (void)
2007 {
2008 	lexingState st;
2009 	ocaToken tok;
2010 
2011 	/* One-token lookahead gives us the ability to
2012 	 * do much more accurate analysis */
2013 	lexingState nextSt;
2014 	ocaToken nextTok;
2015 
2016 	initStack ();
2017 
2018 	tempIdent = vStringNew ();
2019 	lastModule = vStringNew ();
2020 	lastClass = vStringNew ();
2021 	vString *temp_cp = vStringNew ();
2022 
2023 	nextSt.name = vStringNew ();
2024 	nextSt.cp = readLineFromInputFile ();
2025 	ocaLineNumber = getInputLineNumber();
2026 	ocaFilePosition = getInputFilePosition();
2027 	toDoNext = &globalScope;
2028 	nextTok = lex (&nextSt);
2029 
2030 	if (nextTok != Tok_EOF)
2031 		computeModuleName ();
2032 
2033 	/* prime the lookahead token */
2034 	st = nextSt;	// preserve the old state for our first token
2035 	st.name = vStringNewCopy (st.name);
2036 	st.cp = (const unsigned char *) vStringValue (temp_cp);
2037 	tok = nextTok;
2038 	ocaLineNumber = getInputLineNumber(); /* ??? getSourceLineNumber() */
2039 	ocaFilePosition = getInputFilePosition();
2040 	nextTok = lex (&nextSt);
2041 
2042 	/* main loop */
2043 	while (tok != Tok_EOF)
2044 	{
2045 		(*toDoNext) (st.name, tok, nextTok);
2046 
2047 		tok = nextTok;
2048 		ocaLineNumber = getInputLineNumber(); /* ??? */
2049 		ocaFilePosition = getInputFilePosition();
2050 
2051 		if (nextTok != Tok_EOF)
2052 		{
2053 			vStringCopyS (temp_cp, (const char *) nextSt.cp);
2054 			st.cp = (const unsigned char *) vStringValue (temp_cp);
2055 			vStringCopy (st.name, nextSt.name);
2056 			nextTok = lex (&nextSt);
2057 		}
2058 		else
2059 			break;
2060 	}
2061 
2062 	vStringDelete (st.name);
2063 	vStringDelete (nextSt.name);
2064 	vStringDelete (temp_cp);
2065 	vStringDelete (tempIdent);
2066 	vStringDelete (lastModule);
2067 	vStringDelete (lastClass);
2068 	clearStack ();
2069 }
2070 
ocamlInitialize(const langType language)2071 static void ocamlInitialize (const langType language)
2072 {
2073 	Lang_Ocaml = language;
2074 
2075 	initOperatorTable ();
2076 }
2077 
OcamlParser(void)2078 extern parserDefinition *OcamlParser (void)
2079 {
2080 	static const char *const extensions[] = { "ml", "mli", "aug", NULL };
2081 	static const char *const aliases[] = { "tuareg", /* mode name of emacs */
2082 										   "caml",	 /* mode name of emacs */
2083 										   NULL };
2084 	parserDefinition *def = parserNew ("OCaml");
2085 	def->kindTable = OcamlKinds;
2086 	def->kindCount = ARRAY_SIZE (OcamlKinds);
2087 	def->extensions = extensions;
2088 	def->aliases = aliases;
2089 	def->parser = findOcamlTags;
2090 	def->initialize = ocamlInitialize;
2091 	def->keywordTable = OcamlKeywordTable;
2092 	def->keywordCount = ARRAY_SIZE (OcamlKeywordTable);
2093 	return def;
2094 }
2095