1 /*-
2  * Copyright (c) 2006 Verdens Gang AS
3  * Copyright (c) 2006-2011 Varnish Software AS
4  * All rights reserved.
5  *
6  * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7  *
8  * SPDX-License-Identifier: BSD-2-Clause
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include "config.h"
33 
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "vcc_compile.h"
38 
39 #include "venc.h"
40 #include "vct.h"
41 #include "vsb.h"
42 
43 /*--------------------------------------------------------------------*/
44 
45 void
vcc_ErrToken(const struct vcc * tl,const struct token * t)46 vcc_ErrToken(const struct vcc *tl, const struct token *t)
47 {
48 
49 	if (t->tok == EOI)
50 		VSB_cat(tl->sb, "end of input");
51 	else if (t->tok == CSRC)
52 		VSB_cat(tl->sb, "C{ ... }C");
53 	else
54 		VSB_printf(tl->sb, "'%.*s'", PF(t));
55 }
56 
57 void
vcc__ErrInternal(struct vcc * tl,const char * func,unsigned line)58 vcc__ErrInternal(struct vcc *tl, const char *func, unsigned line)
59 {
60 
61 	VSB_printf(tl->sb, "VCL compiler internal error at %s():%u\n",
62 	    func, line);
63 	tl->err = 1;
64 }
65 
66 /*--------------------------------------------------------------------
67  * Find start of source-line of token
68  */
69 
70 static void
vcc_iline(const struct token * t,const char ** ll,int tail)71 vcc_iline(const struct token *t, const char **ll, int tail)
72 {
73 	const char *p, *b, *x;
74 
75 	b = t->src->b;
76 	if (ll != NULL)
77 		*ll = b;
78 	x = tail ? t->e - 1 : t->b;
79 	for (p = b; p < x; p++) {
80 		if (*p == '\n') {
81 			if (ll != NULL)
82 				*ll = p + 1;
83 		}
84 	}
85 }
86 
87 /*--------------------------------------------------------------------
88  * Find and print src+line+pos of this token
89  */
90 
91 static void
vcc_icoord(struct vsb * vsb,const struct token * t,int tail)92 vcc_icoord(struct vsb *vsb, const struct token *t, int tail)
93 {
94 	unsigned lin, pos;
95 	const char *p, *b, *x;
96 
97 	lin = 1;
98 	pos = 0;
99 	b = t->src->b;
100 	x = tail ? t->e - 1 : t->b;
101 	for (p = b; p < x; p++) {
102 		if (*p == '\n') {
103 			lin++;
104 			pos = 0;
105 		} else if (*p == '\t') {
106 			pos &= ~7;
107 			pos += 8;
108 		} else
109 			pos++;
110 	}
111 	VSB_cat(vsb, "(");
112 	if (tail < 2)
113 		VSB_printf(vsb, "'%s' Line %u ", t->src->name, lin);
114 	VSB_printf(vsb, "Pos %u)", pos + 1);
115 }
116 
117 /*--------------------------------------------------------------------*/
118 
119 void
vcc_Coord(const struct vcc * tl,struct vsb * vsb,const struct token * t)120 vcc_Coord(const struct vcc *tl, struct vsb *vsb, const struct token *t)
121 {
122 
123 	if (t == NULL)
124 		t = tl->t;
125 	vcc_icoord(vsb, t, 0);
126 }
127 
128 /*--------------------------------------------------------------------
129  * Output one line of source code, starting at 'l' and ending at the
130  * first NL or 'le'.
131  */
132 
133 static void
vcc_quoteline(const struct vcc * tl,const char * l,const char * le)134 vcc_quoteline(const struct vcc *tl, const char *l, const char *le)
135 {
136 	const char *p;
137 	unsigned x, y;
138 
139 	x = y = 0;
140 	for (p = l; p < le && *p != '\n'; p++) {
141 		if (*p == '\t') {
142 			y &= ~7;
143 			y += 8;
144 			while (x < y) {
145 				VSB_putc(tl->sb, ' ');
146 				x++;
147 			}
148 		} else {
149 			x++;
150 			y++;
151 			VSB_putc(tl->sb, *p);
152 		}
153 	}
154 	VSB_putc(tl->sb, '\n');
155 }
156 
157 /*--------------------------------------------------------------------
158  * Output a marker line for a sourceline starting at 'l' and ending at
159  * the first NL or 'le'.  Characters between 'b' and 'e' are marked.
160  */
161 
162 static void
vcc_markline(const struct vcc * tl,const char * l,const char * le,const char * b,const char * e)163 vcc_markline(const struct vcc *tl, const char *l, const char *le,
164     const char *b, const char *e)
165 {
166 	const char *p;
167 	unsigned x, y;
168 	char c;
169 
170 	x = y = 0;
171 	for (p = l; p < le && *p != '\n'; p++) {
172 		if (p >= b && p < e)
173 			c = '#';
174 		else
175 			c = '-';
176 
177 		if (*p == '\t') {
178 			y &= ~7;
179 			y += 8;
180 		} else
181 			y++;
182 		while (x < y) {
183 			VSB_putc(tl->sb, c);
184 			x++;
185 		}
186 	}
187 	VSB_putc(tl->sb, '\n');
188 }
189 
190 void
vcc_Warn(struct vcc * tl)191 vcc_Warn(struct vcc *tl)
192 {
193 
194 	AN(tl);
195 	AN(tl->err);
196 	VSB_cat(tl->sb, "(That was just a warning)\n");
197 	tl->err = 0;
198 }
199 
200 /*--------------------------------------------------------------------*/
201 /* XXX: should take first+last token */
202 
203 void
vcc_ErrWhere2(struct vcc * tl,const struct token * t,const struct token * t2)204 vcc_ErrWhere2(struct vcc *tl, const struct token *t, const struct token *t2)
205 {
206 	const char  *l1, *l2, *l3;
207 
208 	if (t == NULL) {
209 		vcc_ErrWhere(tl, t2);
210 		return;
211 	}
212 	vcc_iline(t, &l1, 0);
213 	t2 = VTAILQ_PREV(t2, tokenhead, list);
214 	vcc_iline(t2, &l2, 1);
215 
216 
217 	if (l1 == l2) {
218 		vcc_icoord(tl->sb, t, 0);
219 		VSB_cat(tl->sb, " -- ");
220 		vcc_icoord(tl->sb, t2, 2);
221 		VSB_putc(tl->sb, '\n');
222 		/* Two tokens on same line */
223 		vcc_quoteline(tl, l1, t->src->e);
224 		vcc_markline(tl, l1, t->src->e, t->b, t2->e);
225 	} else {
226 		/* Two tokens different lines */
227 		l3 = strchr(l1, '\n');
228 		AN(l3);
229 		/* XXX: t had better be before t2 */
230 		vcc_icoord(tl->sb, t, 0);
231 		if (l3 + 1 == l2) {
232 			VSB_cat(tl->sb, " -- ");
233 			vcc_icoord(tl->sb, t2, 1);
234 		}
235 		VSB_putc(tl->sb, '\n');
236 		vcc_quoteline(tl, l1, t->src->e);
237 		vcc_markline(tl, l1, t->src->e, t->b, t2->e);
238 		if (l3 + 1 != l2) {
239 			VSB_cat(tl->sb, "[...]\n");
240 			vcc_icoord(tl->sb, t2, 1);
241 			VSB_putc(tl->sb, '\n');
242 		}
243 		vcc_quoteline(tl, l2, t->src->e);
244 		vcc_markline(tl, l2, t->src->e, t->b, t2->e);
245 	}
246 	VSB_putc(tl->sb, '\n');
247 	tl->err = 1;
248 }
249 
250 void
vcc_ErrWhere(struct vcc * tl,const struct token * t)251 vcc_ErrWhere(struct vcc *tl, const struct token *t)
252 {
253 	const char  *l1;
254 
255 	vcc_iline(t, &l1, 0);
256 	vcc_icoord(tl->sb, t, 0);
257 	VSB_putc(tl->sb, '\n');
258 	vcc_quoteline(tl, l1, t->src->e);
259 	vcc_markline(tl, l1, t->src->e, t->b, t->e);
260 	VSB_putc(tl->sb, '\n');
261 	tl->err = 1;
262 }
263 
264 /*--------------------------------------------------------------------*/
265 
266 void
vcc_NextToken(struct vcc * tl)267 vcc_NextToken(struct vcc *tl)
268 {
269 
270 	tl->t = VTAILQ_NEXT(tl->t, list);
271 	if (tl->t == NULL) {
272 		VSB_cat(tl->sb,
273 		    "Ran out of input, something is missing or"
274 		    " maybe unbalanced (...) or {...}\n");
275 		tl->err = 1;
276 		return;
277 	}
278 }
279 
280 void
vcc__Expect(struct vcc * tl,unsigned tok,unsigned line)281 vcc__Expect(struct vcc *tl, unsigned tok, unsigned line)
282 {
283 	if (tl->t->tok == tok)
284 		return;
285 	VSB_printf(tl->sb, "Expected %s got ", vcl_tnames[tok]);
286 	vcc_ErrToken(tl, tl->t);
287 	VSB_printf(tl->sb, "\n(program line %u), at\n", line);
288 	vcc_ErrWhere(tl, tl->t);
289 }
290 
291 /*--------------------------------------------------------------------
292  * Compare ID token to string, return true of match
293  */
294 
295 int
vcc_IdIs(const struct token * t,const char * p)296 vcc_IdIs(const struct token *t, const char *p)
297 {
298 	const char *q;
299 
300 	assert(t->tok == ID);
301 	for (q = t->b; q < t->e && *p != '\0'; p++, q++)
302 		if (*q != *p)
303 			return (0);
304 	if (q != t->e || *p != '\0')
305 		return (0);
306 	return (1);
307 }
308 
309 /*--------------------------------------------------------------------
310  * Check that we have a Varnish identifier
311  */
312 
313 void
vcc_ExpectVid(struct vcc * tl,const char * what)314 vcc_ExpectVid(struct vcc *tl, const char *what)
315 {
316 	const char *bad = NULL;
317 	struct token *t2, *t3;
318 
319 	ExpectErr(tl, ID);
320 	ERRCHK(tl);
321 
322 	t2 = VTAILQ_NEXT(tl->t, list);
323 	while (t2->tok == '.') {
324 		bad = ".";
325 		t2 = VTAILQ_NEXT(t2, list);
326 		if (t2->tok != ID)
327 			break;
328 		t2 = VTAILQ_NEXT(t2, list);
329 	}
330 	if (bad == NULL)
331 		bad = VCT_invalid_name(tl->t->b, tl->t->e);
332 	if (bad != NULL) {
333 		VSB_printf(tl->sb, "Name of %s, '", what);
334 		for (t3 = tl->t; t3 != t2; t3 = VTAILQ_NEXT(t3, list))
335 			VSB_printf(tl->sb, "%.*s", PF(t3));
336 		VSB_printf(tl->sb,
337 		    "', contains illegal character '%c'\n", *bad);
338 		vcc_ErrWhere2(tl, tl->t, t2);
339 		return;
340 	}
341 }
342 
343 /*--------------------------------------------------------------------
344  * Decode a string
345  */
346 
347 static void
vcc_decstr(struct vcc * tl,unsigned sep)348 vcc_decstr(struct vcc *tl, unsigned sep)
349 {
350 	char *q;
351 	unsigned int l;
352 
353 	assert(tl->t->tok == CSTR);
354 	l = pdiff(tl->t->b + sep, tl->t->e - sep);
355 	tl->t->dec = TlAlloc(tl, l + 1);
356 	AN(tl->t->dec);
357 	q = tl->t->dec;
358 	memcpy(q, tl->t->b + sep, l);
359 	q[l] = '\0';
360 }
361 
362 /*--------------------------------------------------------------------
363  * Add a token to the token list.
364  */
365 
366 static void
vcc_addtoken(struct vcc * tl,unsigned tok,const struct source * sp,const char * b,const char * e)367 vcc_addtoken(struct vcc *tl, unsigned tok,
368     const struct source *sp, const char *b, const char *e)
369 {
370 	struct token *t;
371 
372 	t = TlAlloc(tl, sizeof *t);
373 	assert(t != NULL);
374 	t->tok = tok;
375 	t->b = b;
376 	t->e = e;
377 	t->src = sp;
378 	if (tl->t != NULL)
379 		VTAILQ_INSERT_AFTER(&tl->tokens, tl->t, t, list);
380 	else
381 		VTAILQ_INSERT_TAIL(&tl->tokens, t, list);
382 	tl->t = t;
383 }
384 
385 /*--------------------------------------------------------------------
386  * Find a delimited token
387  */
388 
389 static const struct delim_def {
390 	const char	*name;
391 	const char	*b;
392 	const char	*e;
393 	unsigned	len;	/* NB: must be the same for both delimiters */
394 	unsigned	crlf;
395 	unsigned	tok;
396 } delim_defs[] = {
397 #define DELIM_DEF(nm, l, r, c, t)		\
398 	{ nm, l, r, sizeof (l) - 1, c, t }
399 	DELIM_DEF("long-string", "\"\"\"", "\"\"\"", 1, CSTR),	/* """...""" */
400 	DELIM_DEF("long-string", "{\"", "\"}", 1, CSTR),	/*  {"..."}  */
401 	DELIM_DEF("string", "\"", "\"", 0, CSTR),		/*   "..."   */
402 	DELIM_DEF("inline C source", "C{", "}C", 1, CSRC),	/*  C{...}C  */
403 #undef DELIM_DEF
404 	{ NULL }
405 };
406 
407 static unsigned
vcc_delim_token(struct vcc * tl,const struct source * sp,const char * p,const char ** qp)408 vcc_delim_token(struct vcc *tl, const struct source *sp, const char *p,
409     const char **qp)
410 {
411 	const struct delim_def *dd;
412 	const char *q, *r;
413 
414 	for (dd = delim_defs; dd->name != NULL; dd++)
415 		if (!strncmp(p, dd->b, dd->len))
416 			break;
417 
418 	if (dd->name == NULL)
419 		return (0);
420 
421 	q = strstr(p + dd->len, dd->e);
422 	if (q != NULL && !dd->crlf) {
423 		r = strpbrk(p + dd->len, "\r\n");
424 		if (r != NULL && r < q)
425 			q = NULL;
426 	}
427 
428 	if (q == NULL) {
429 		vcc_addtoken(tl, EOI, sp, p, p + dd->len);
430 		VSB_printf(tl->sb, "Unterminated %s, starting at\n", dd->name);
431 		vcc_ErrWhere(tl, tl->t);
432 		return (0);
433 	}
434 
435 	assert(q < sp->e);
436 	vcc_addtoken(tl, dd->tok, sp, p, q + dd->len);
437 	if (dd->tok == CSTR)
438 		vcc_decstr(tl, dd->len);
439 	*qp = q + dd->len;
440 	return (1);
441 }
442 
443 /*--------------------------------------------------------------------
444  * Lexical analysis and token generation
445  */
446 
447 void
vcc_Lexer(struct vcc * tl,const struct source * sp,int eoi)448 vcc_Lexer(struct vcc *tl, const struct source *sp, int eoi)
449 {
450 	const char *p, *q, *r;
451 	unsigned u;
452 	struct vsb *vsb;
453 	char namebuf[40];
454 
455 	for (p = sp->b; p < sp->e; ) {
456 
457 		/* Skip any whitespace */
458 		if (vct_isspace(*p)) {
459 			p++;
460 			continue;
461 		}
462 
463 		/* Skip '#.*\n' comments */
464 		if (*p == '#') {
465 			while (p < sp->e && *p != '\n')
466 				p++;
467 			continue;
468 		}
469 
470 		/* Skip C-style comments */
471 		if (*p == '/' && p[1] == '*') {
472 			for (q = p + 2; q < sp->e; q++) {
473 				if (*q == '/' && q[1] == '*') {
474 					VSB_cat(tl->sb,
475 					    "/* ... */ comment contains /*\n");
476 					vcc_addtoken(tl, EOI, sp, p, p + 2);
477 					vcc_ErrWhere(tl, tl->t);
478 					vcc_addtoken(tl, EOI, sp, q, q + 2);
479 					vcc_ErrWhere(tl, tl->t);
480 					return;
481 				}
482 				if (*q == '*' && q[1] == '/') {
483 					p = q + 2;
484 					break;
485 				}
486 			}
487 			if (q < sp->e)
488 				continue;
489 			vcc_addtoken(tl, EOI, sp, p, p + 2);
490 			VSB_cat(tl->sb,
491 			    "Unterminated /* ... */ comment, starting at\n");
492 			vcc_ErrWhere(tl, tl->t);
493 			return;
494 		}
495 
496 		/* Skip C++-style comments */
497 		if (*p == '/' && p[1] == '/') {
498 			while (p < sp->e && *p != '\n')
499 				p++;
500 			continue;
501 		}
502 
503 		/* Recognize BLOB (= SF-binary) */
504 		if (*p == ':') {
505 			vsb = VSB_new_auto();
506 			AN(vsb);
507 			q = sp->e;
508 			q -= (q - (p + 1)) % 4;
509 			assert(q > p);
510 			r = VENC_Decode_Base64(vsb, p + 1, q);
511 			if (r == NULL) {
512 				vcc_addtoken(tl, CBLOB, sp, p, q + 1);
513 				VSB_cat(tl->sb,
514 				    "Missing colon at end of BLOB:\n");
515 				vcc_ErrWhere(tl, tl->t);
516 				VSB_destroy(&vsb);
517 				return;
518 			}
519 			vcc_addtoken(tl, CBLOB, sp, p, r + 1);
520 			if (*r == ':' && ((r - p) % 4) != 1) {
521 				VSB_cat(tl->sb,
522 				    "BLOB must have n*4 base64 characters\n");
523 				vcc_ErrWhere(tl, tl->t);
524 				VSB_destroy(&vsb);
525 				return;
526 			}
527 			if (*r == '=') {
528 				VSB_cat(tl->sb,
529 				    "Wrong padding ('=') in BLOB:\n");
530 				vcc_ErrWhere(tl, tl->t);
531 				VSB_destroy(&vsb);
532 				return;
533 			}
534 			if (*r != ':') {
535 				VSB_cat(tl->sb, "Illegal BLOB character:\n");
536 				vcc_ErrWhere(tl, tl->t);
537 				VSB_destroy(&vsb);
538 				return;
539 			}
540 			r++;
541 			AZ(VSB_finish(vsb));
542 
543 			bprintf(namebuf, "blob_%u", tl->unique++);
544 			Fh(tl, 0, "\nconst unsigned char %s_data[%zd] = {\n",
545 			    namebuf, VSB_len(vsb));
546 			for (u = 0; u < VSB_len(vsb); u++) {
547 				Fh(tl, 0, "\t0x%02x,", VSB_data(vsb)[u] & 0xff);
548 				if ((u & 7) == 7)
549 					Fh(tl, 0, "\n");
550 			}
551 			if ((u & 7) != 7)
552 				Fh(tl, 0, "\n");
553 			Fh(tl, 0, "};\n");
554 			Fh(tl, 0, "\nconst struct vrt_blob %s[1] = {{\n",
555 			    namebuf);
556 			Fh(tl, 0, "\t.len =\t%zd,\n", VSB_len(vsb));
557 			Fh(tl, 0, "\t.blob =\t%s_data,\n", namebuf);
558 			Fh(tl, 0, "}};\n");
559 			REPLACE(tl->t->dec, namebuf);
560 			VSB_destroy(&vsb);
561 			p = r;
562 			continue;
563 		}
564 
565 		/* Match delimited tokens */
566 		if (vcc_delim_token(tl, sp, p, &q) != 0) {
567 			p = q;
568 			continue;
569 		}
570 		ERRCHK(tl);
571 
572 		/* Match for the fixed tokens (see generate.py) */
573 		u = vcl_fixed_token(p, &q);
574 		if (u != 0) {
575 			vcc_addtoken(tl, u, sp, p, q);
576 			p = q;
577 			continue;
578 		}
579 
580 		/* Match Identifiers */
581 		if (vct_isident1(*p)) {
582 			for (q = p; q < sp->e; q++)
583 				if (!vct_isident(*q))
584 					break;
585 			vcc_addtoken(tl, ID, sp, p, q);
586 			p = q;
587 			continue;
588 		}
589 
590 		/* Match numbers { [0-9]+ } */
591 		if (vct_isdigit(*p)) {
592 			for (q = p; q < sp->e; q++)
593 				if (!vct_isdigit(*q))
594 					break;
595 			if (*q != '.') {
596 				vcc_addtoken(tl, CNUM, sp, p, q);
597 				p = q;
598 				continue;
599 			}
600 			for (++q; q < sp->e; q++)
601 				if (!vct_isdigit(*q))
602 					break;
603 			vcc_addtoken(tl, FNUM, sp, p, q);
604 			p = q;
605 			continue;
606 		}
607 		vcc_addtoken(tl, EOI, sp, p, p + 1);
608 		VSB_cat(tl->sb, "Syntax error at\n");
609 		vcc_ErrWhere(tl, tl->t);
610 		return;
611 	}
612 	if (eoi)
613 		vcc_addtoken(tl, EOI, sp, sp->e, sp->e);
614 }
615