1 /*-
2 * Copyright (c) 2006 Verdens Gang AS
3 * Copyright (c) 2006-2011 Varnish Software AS
4 * All rights reserved.
5 *
6 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7 *
8 * SPDX-License-Identifier: BSD-2-Clause
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "config.h"
33
34 #include <stdlib.h>
35 #include <string.h>
36
37 #include "vcc_compile.h"
38
39 #include "venc.h"
40 #include "vct.h"
41 #include "vsb.h"
42
43 /*--------------------------------------------------------------------*/
44
45 void
vcc_ErrToken(const struct vcc * tl,const struct token * t)46 vcc_ErrToken(const struct vcc *tl, const struct token *t)
47 {
48
49 if (t->tok == EOI)
50 VSB_cat(tl->sb, "end of input");
51 else if (t->tok == CSRC)
52 VSB_cat(tl->sb, "C{ ... }C");
53 else
54 VSB_printf(tl->sb, "'%.*s'", PF(t));
55 }
56
57 void
vcc__ErrInternal(struct vcc * tl,const char * func,unsigned line)58 vcc__ErrInternal(struct vcc *tl, const char *func, unsigned line)
59 {
60
61 VSB_printf(tl->sb, "VCL compiler internal error at %s():%u\n",
62 func, line);
63 tl->err = 1;
64 }
65
66 /*--------------------------------------------------------------------
67 * Find start of source-line of token
68 */
69
70 static void
vcc_iline(const struct token * t,const char ** ll,int tail)71 vcc_iline(const struct token *t, const char **ll, int tail)
72 {
73 const char *p, *b, *x;
74
75 b = t->src->b;
76 if (ll != NULL)
77 *ll = b;
78 x = tail ? t->e - 1 : t->b;
79 for (p = b; p < x; p++) {
80 if (*p == '\n') {
81 if (ll != NULL)
82 *ll = p + 1;
83 }
84 }
85 }
86
87 /*--------------------------------------------------------------------
88 * Find and print src+line+pos of this token
89 */
90
91 static void
vcc_icoord(struct vsb * vsb,const struct token * t,int tail)92 vcc_icoord(struct vsb *vsb, const struct token *t, int tail)
93 {
94 unsigned lin, pos;
95 const char *p, *b, *x;
96
97 lin = 1;
98 pos = 0;
99 b = t->src->b;
100 x = tail ? t->e - 1 : t->b;
101 for (p = b; p < x; p++) {
102 if (*p == '\n') {
103 lin++;
104 pos = 0;
105 } else if (*p == '\t') {
106 pos &= ~7;
107 pos += 8;
108 } else
109 pos++;
110 }
111 VSB_cat(vsb, "(");
112 if (tail < 2)
113 VSB_printf(vsb, "'%s' Line %u ", t->src->name, lin);
114 VSB_printf(vsb, "Pos %u)", pos + 1);
115 }
116
117 /*--------------------------------------------------------------------*/
118
119 void
vcc_Coord(const struct vcc * tl,struct vsb * vsb,const struct token * t)120 vcc_Coord(const struct vcc *tl, struct vsb *vsb, const struct token *t)
121 {
122
123 if (t == NULL)
124 t = tl->t;
125 vcc_icoord(vsb, t, 0);
126 }
127
128 /*--------------------------------------------------------------------
129 * Output one line of source code, starting at 'l' and ending at the
130 * first NL or 'le'.
131 */
132
133 static void
vcc_quoteline(const struct vcc * tl,const char * l,const char * le)134 vcc_quoteline(const struct vcc *tl, const char *l, const char *le)
135 {
136 const char *p;
137 unsigned x, y;
138
139 x = y = 0;
140 for (p = l; p < le && *p != '\n'; p++) {
141 if (*p == '\t') {
142 y &= ~7;
143 y += 8;
144 while (x < y) {
145 VSB_putc(tl->sb, ' ');
146 x++;
147 }
148 } else {
149 x++;
150 y++;
151 VSB_putc(tl->sb, *p);
152 }
153 }
154 VSB_putc(tl->sb, '\n');
155 }
156
157 /*--------------------------------------------------------------------
158 * Output a marker line for a sourceline starting at 'l' and ending at
159 * the first NL or 'le'. Characters between 'b' and 'e' are marked.
160 */
161
162 static void
vcc_markline(const struct vcc * tl,const char * l,const char * le,const char * b,const char * e)163 vcc_markline(const struct vcc *tl, const char *l, const char *le,
164 const char *b, const char *e)
165 {
166 const char *p;
167 unsigned x, y;
168 char c;
169
170 x = y = 0;
171 for (p = l; p < le && *p != '\n'; p++) {
172 if (p >= b && p < e)
173 c = '#';
174 else
175 c = '-';
176
177 if (*p == '\t') {
178 y &= ~7;
179 y += 8;
180 } else
181 y++;
182 while (x < y) {
183 VSB_putc(tl->sb, c);
184 x++;
185 }
186 }
187 VSB_putc(tl->sb, '\n');
188 }
189
190 void
vcc_Warn(struct vcc * tl)191 vcc_Warn(struct vcc *tl)
192 {
193
194 AN(tl);
195 AN(tl->err);
196 VSB_cat(tl->sb, "(That was just a warning)\n");
197 tl->err = 0;
198 }
199
200 /*--------------------------------------------------------------------*/
201 /* XXX: should take first+last token */
202
203 void
vcc_ErrWhere2(struct vcc * tl,const struct token * t,const struct token * t2)204 vcc_ErrWhere2(struct vcc *tl, const struct token *t, const struct token *t2)
205 {
206 const char *l1, *l2, *l3;
207
208 if (t == NULL) {
209 vcc_ErrWhere(tl, t2);
210 return;
211 }
212 vcc_iline(t, &l1, 0);
213 t2 = VTAILQ_PREV(t2, tokenhead, list);
214 vcc_iline(t2, &l2, 1);
215
216
217 if (l1 == l2) {
218 vcc_icoord(tl->sb, t, 0);
219 VSB_cat(tl->sb, " -- ");
220 vcc_icoord(tl->sb, t2, 2);
221 VSB_putc(tl->sb, '\n');
222 /* Two tokens on same line */
223 vcc_quoteline(tl, l1, t->src->e);
224 vcc_markline(tl, l1, t->src->e, t->b, t2->e);
225 } else {
226 /* Two tokens different lines */
227 l3 = strchr(l1, '\n');
228 AN(l3);
229 /* XXX: t had better be before t2 */
230 vcc_icoord(tl->sb, t, 0);
231 if (l3 + 1 == l2) {
232 VSB_cat(tl->sb, " -- ");
233 vcc_icoord(tl->sb, t2, 1);
234 }
235 VSB_putc(tl->sb, '\n');
236 vcc_quoteline(tl, l1, t->src->e);
237 vcc_markline(tl, l1, t->src->e, t->b, t2->e);
238 if (l3 + 1 != l2) {
239 VSB_cat(tl->sb, "[...]\n");
240 vcc_icoord(tl->sb, t2, 1);
241 VSB_putc(tl->sb, '\n');
242 }
243 vcc_quoteline(tl, l2, t->src->e);
244 vcc_markline(tl, l2, t->src->e, t->b, t2->e);
245 }
246 VSB_putc(tl->sb, '\n');
247 tl->err = 1;
248 }
249
250 void
vcc_ErrWhere(struct vcc * tl,const struct token * t)251 vcc_ErrWhere(struct vcc *tl, const struct token *t)
252 {
253 const char *l1;
254
255 vcc_iline(t, &l1, 0);
256 vcc_icoord(tl->sb, t, 0);
257 VSB_putc(tl->sb, '\n');
258 vcc_quoteline(tl, l1, t->src->e);
259 vcc_markline(tl, l1, t->src->e, t->b, t->e);
260 VSB_putc(tl->sb, '\n');
261 tl->err = 1;
262 }
263
264 /*--------------------------------------------------------------------*/
265
266 void
vcc_NextToken(struct vcc * tl)267 vcc_NextToken(struct vcc *tl)
268 {
269
270 tl->t = VTAILQ_NEXT(tl->t, list);
271 if (tl->t == NULL) {
272 VSB_cat(tl->sb,
273 "Ran out of input, something is missing or"
274 " maybe unbalanced (...) or {...}\n");
275 tl->err = 1;
276 return;
277 }
278 }
279
280 void
vcc__Expect(struct vcc * tl,unsigned tok,unsigned line)281 vcc__Expect(struct vcc *tl, unsigned tok, unsigned line)
282 {
283 if (tl->t->tok == tok)
284 return;
285 VSB_printf(tl->sb, "Expected %s got ", vcl_tnames[tok]);
286 vcc_ErrToken(tl, tl->t);
287 VSB_printf(tl->sb, "\n(program line %u), at\n", line);
288 vcc_ErrWhere(tl, tl->t);
289 }
290
291 /*--------------------------------------------------------------------
292 * Compare ID token to string, return true of match
293 */
294
295 int
vcc_IdIs(const struct token * t,const char * p)296 vcc_IdIs(const struct token *t, const char *p)
297 {
298 const char *q;
299
300 assert(t->tok == ID);
301 for (q = t->b; q < t->e && *p != '\0'; p++, q++)
302 if (*q != *p)
303 return (0);
304 if (q != t->e || *p != '\0')
305 return (0);
306 return (1);
307 }
308
309 /*--------------------------------------------------------------------
310 * Check that we have a Varnish identifier
311 */
312
313 void
vcc_ExpectVid(struct vcc * tl,const char * what)314 vcc_ExpectVid(struct vcc *tl, const char *what)
315 {
316 const char *bad = NULL;
317 struct token *t2, *t3;
318
319 ExpectErr(tl, ID);
320 ERRCHK(tl);
321
322 t2 = VTAILQ_NEXT(tl->t, list);
323 while (t2->tok == '.') {
324 bad = ".";
325 t2 = VTAILQ_NEXT(t2, list);
326 if (t2->tok != ID)
327 break;
328 t2 = VTAILQ_NEXT(t2, list);
329 }
330 if (bad == NULL)
331 bad = VCT_invalid_name(tl->t->b, tl->t->e);
332 if (bad != NULL) {
333 VSB_printf(tl->sb, "Name of %s, '", what);
334 for (t3 = tl->t; t3 != t2; t3 = VTAILQ_NEXT(t3, list))
335 VSB_printf(tl->sb, "%.*s", PF(t3));
336 VSB_printf(tl->sb,
337 "', contains illegal character '%c'\n", *bad);
338 vcc_ErrWhere2(tl, tl->t, t2);
339 return;
340 }
341 }
342
343 /*--------------------------------------------------------------------
344 * Decode a string
345 */
346
347 static void
vcc_decstr(struct vcc * tl,unsigned sep)348 vcc_decstr(struct vcc *tl, unsigned sep)
349 {
350 char *q;
351 unsigned int l;
352
353 assert(tl->t->tok == CSTR);
354 l = pdiff(tl->t->b + sep, tl->t->e - sep);
355 tl->t->dec = TlAlloc(tl, l + 1);
356 AN(tl->t->dec);
357 q = tl->t->dec;
358 memcpy(q, tl->t->b + sep, l);
359 q[l] = '\0';
360 }
361
362 /*--------------------------------------------------------------------
363 * Add a token to the token list.
364 */
365
366 static void
vcc_addtoken(struct vcc * tl,unsigned tok,const struct source * sp,const char * b,const char * e)367 vcc_addtoken(struct vcc *tl, unsigned tok,
368 const struct source *sp, const char *b, const char *e)
369 {
370 struct token *t;
371
372 t = TlAlloc(tl, sizeof *t);
373 assert(t != NULL);
374 t->tok = tok;
375 t->b = b;
376 t->e = e;
377 t->src = sp;
378 if (tl->t != NULL)
379 VTAILQ_INSERT_AFTER(&tl->tokens, tl->t, t, list);
380 else
381 VTAILQ_INSERT_TAIL(&tl->tokens, t, list);
382 tl->t = t;
383 }
384
385 /*--------------------------------------------------------------------
386 * Find a delimited token
387 */
388
389 static const struct delim_def {
390 const char *name;
391 const char *b;
392 const char *e;
393 unsigned len; /* NB: must be the same for both delimiters */
394 unsigned crlf;
395 unsigned tok;
396 } delim_defs[] = {
397 #define DELIM_DEF(nm, l, r, c, t) \
398 { nm, l, r, sizeof (l) - 1, c, t }
399 DELIM_DEF("long-string", "\"\"\"", "\"\"\"", 1, CSTR), /* """...""" */
400 DELIM_DEF("long-string", "{\"", "\"}", 1, CSTR), /* {"..."} */
401 DELIM_DEF("string", "\"", "\"", 0, CSTR), /* "..." */
402 DELIM_DEF("inline C source", "C{", "}C", 1, CSRC), /* C{...}C */
403 #undef DELIM_DEF
404 { NULL }
405 };
406
407 static unsigned
vcc_delim_token(struct vcc * tl,const struct source * sp,const char * p,const char ** qp)408 vcc_delim_token(struct vcc *tl, const struct source *sp, const char *p,
409 const char **qp)
410 {
411 const struct delim_def *dd;
412 const char *q, *r;
413
414 for (dd = delim_defs; dd->name != NULL; dd++)
415 if (!strncmp(p, dd->b, dd->len))
416 break;
417
418 if (dd->name == NULL)
419 return (0);
420
421 q = strstr(p + dd->len, dd->e);
422 if (q != NULL && !dd->crlf) {
423 r = strpbrk(p + dd->len, "\r\n");
424 if (r != NULL && r < q)
425 q = NULL;
426 }
427
428 if (q == NULL) {
429 vcc_addtoken(tl, EOI, sp, p, p + dd->len);
430 VSB_printf(tl->sb, "Unterminated %s, starting at\n", dd->name);
431 vcc_ErrWhere(tl, tl->t);
432 return (0);
433 }
434
435 assert(q < sp->e);
436 vcc_addtoken(tl, dd->tok, sp, p, q + dd->len);
437 if (dd->tok == CSTR)
438 vcc_decstr(tl, dd->len);
439 *qp = q + dd->len;
440 return (1);
441 }
442
443 /*--------------------------------------------------------------------
444 * Lexical analysis and token generation
445 */
446
447 void
vcc_Lexer(struct vcc * tl,const struct source * sp,int eoi)448 vcc_Lexer(struct vcc *tl, const struct source *sp, int eoi)
449 {
450 const char *p, *q, *r;
451 unsigned u;
452 struct vsb *vsb;
453 char namebuf[40];
454
455 for (p = sp->b; p < sp->e; ) {
456
457 /* Skip any whitespace */
458 if (vct_isspace(*p)) {
459 p++;
460 continue;
461 }
462
463 /* Skip '#.*\n' comments */
464 if (*p == '#') {
465 while (p < sp->e && *p != '\n')
466 p++;
467 continue;
468 }
469
470 /* Skip C-style comments */
471 if (*p == '/' && p[1] == '*') {
472 for (q = p + 2; q < sp->e; q++) {
473 if (*q == '/' && q[1] == '*') {
474 VSB_cat(tl->sb,
475 "/* ... */ comment contains /*\n");
476 vcc_addtoken(tl, EOI, sp, p, p + 2);
477 vcc_ErrWhere(tl, tl->t);
478 vcc_addtoken(tl, EOI, sp, q, q + 2);
479 vcc_ErrWhere(tl, tl->t);
480 return;
481 }
482 if (*q == '*' && q[1] == '/') {
483 p = q + 2;
484 break;
485 }
486 }
487 if (q < sp->e)
488 continue;
489 vcc_addtoken(tl, EOI, sp, p, p + 2);
490 VSB_cat(tl->sb,
491 "Unterminated /* ... */ comment, starting at\n");
492 vcc_ErrWhere(tl, tl->t);
493 return;
494 }
495
496 /* Skip C++-style comments */
497 if (*p == '/' && p[1] == '/') {
498 while (p < sp->e && *p != '\n')
499 p++;
500 continue;
501 }
502
503 /* Recognize BLOB (= SF-binary) */
504 if (*p == ':') {
505 vsb = VSB_new_auto();
506 AN(vsb);
507 q = sp->e;
508 q -= (q - (p + 1)) % 4;
509 assert(q > p);
510 r = VENC_Decode_Base64(vsb, p + 1, q);
511 if (r == NULL) {
512 vcc_addtoken(tl, CBLOB, sp, p, q + 1);
513 VSB_cat(tl->sb,
514 "Missing colon at end of BLOB:\n");
515 vcc_ErrWhere(tl, tl->t);
516 VSB_destroy(&vsb);
517 return;
518 }
519 vcc_addtoken(tl, CBLOB, sp, p, r + 1);
520 if (*r == ':' && ((r - p) % 4) != 1) {
521 VSB_cat(tl->sb,
522 "BLOB must have n*4 base64 characters\n");
523 vcc_ErrWhere(tl, tl->t);
524 VSB_destroy(&vsb);
525 return;
526 }
527 if (*r == '=') {
528 VSB_cat(tl->sb,
529 "Wrong padding ('=') in BLOB:\n");
530 vcc_ErrWhere(tl, tl->t);
531 VSB_destroy(&vsb);
532 return;
533 }
534 if (*r != ':') {
535 VSB_cat(tl->sb, "Illegal BLOB character:\n");
536 vcc_ErrWhere(tl, tl->t);
537 VSB_destroy(&vsb);
538 return;
539 }
540 r++;
541 AZ(VSB_finish(vsb));
542
543 bprintf(namebuf, "blob_%u", tl->unique++);
544 Fh(tl, 0, "\nconst unsigned char %s_data[%zd] = {\n",
545 namebuf, VSB_len(vsb));
546 for (u = 0; u < VSB_len(vsb); u++) {
547 Fh(tl, 0, "\t0x%02x,", VSB_data(vsb)[u] & 0xff);
548 if ((u & 7) == 7)
549 Fh(tl, 0, "\n");
550 }
551 if ((u & 7) != 7)
552 Fh(tl, 0, "\n");
553 Fh(tl, 0, "};\n");
554 Fh(tl, 0, "\nconst struct vrt_blob %s[1] = {{\n",
555 namebuf);
556 Fh(tl, 0, "\t.len =\t%zd,\n", VSB_len(vsb));
557 Fh(tl, 0, "\t.blob =\t%s_data,\n", namebuf);
558 Fh(tl, 0, "}};\n");
559 REPLACE(tl->t->dec, namebuf);
560 VSB_destroy(&vsb);
561 p = r;
562 continue;
563 }
564
565 /* Match delimited tokens */
566 if (vcc_delim_token(tl, sp, p, &q) != 0) {
567 p = q;
568 continue;
569 }
570 ERRCHK(tl);
571
572 /* Match for the fixed tokens (see generate.py) */
573 u = vcl_fixed_token(p, &q);
574 if (u != 0) {
575 vcc_addtoken(tl, u, sp, p, q);
576 p = q;
577 continue;
578 }
579
580 /* Match Identifiers */
581 if (vct_isident1(*p)) {
582 for (q = p; q < sp->e; q++)
583 if (!vct_isident(*q))
584 break;
585 vcc_addtoken(tl, ID, sp, p, q);
586 p = q;
587 continue;
588 }
589
590 /* Match numbers { [0-9]+ } */
591 if (vct_isdigit(*p)) {
592 for (q = p; q < sp->e; q++)
593 if (!vct_isdigit(*q))
594 break;
595 if (*q != '.') {
596 vcc_addtoken(tl, CNUM, sp, p, q);
597 p = q;
598 continue;
599 }
600 for (++q; q < sp->e; q++)
601 if (!vct_isdigit(*q))
602 break;
603 vcc_addtoken(tl, FNUM, sp, p, q);
604 p = q;
605 continue;
606 }
607 vcc_addtoken(tl, EOI, sp, p, p + 1);
608 VSB_cat(tl->sb, "Syntax error at\n");
609 vcc_ErrWhere(tl, tl->t);
610 return;
611 }
612 if (eoi)
613 vcc_addtoken(tl, EOI, sp, sp->e, sp->e);
614 }
615