1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2021 Tobias Kortkamp <tobik@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 #include "config.h"
29
30 #include <sys/param.h>
31 #include <inttypes.h>
32 #include <stdbool.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <string.h>
36
37 #include "array.h"
38 #include "flow.h"
39 #include "mem.h"
40 #include "mempool.h"
41 #include "peg.h"
42 #include "queue.h"
43 #include "set.h"
44 #include "stack.h"
45 #include "str.h"
46 #include "utf8.h"
47
48 struct PEGError {
49 size_t index;
50 size_t pos;
51 const char *rule;
52 const char *msg;
53 };
54
55 struct PEG {
56 const char *const buf;
57 const size_t len;
58 size_t pos;
59 size_t depth;
60
61 struct {
62 struct Mempool *pool;
63 struct Set *set;
64 struct Queue *queue;
65 struct Stack *pos;
66 bool enabled;
67 } captures;
68
69 bool debug;
70 struct Array *errors;
71 size_t error_index;
72 struct Queue *rule_trace;
73
74 struct Mempool *pool;
75 };
76
77 // Prototypes
78 DECLARE_COMPARE(compare_capture);
79 DECLARE_COMPARE(compare_error);
80 static bool peg_match_thruto(struct PEG *, const char *, const char *, bool);
81 static void peg_line_col_at_pos(struct PEG *, size_t, size_t *, size_t *);
82 static void peg_reset(struct PEG *);
83
84 // Constants
85 static const size_t PEG_MAX_DEPTH = 10000;
86 static const size_t PEG_MAX_ERRORS = 4;
87
88 #define MATCHER_INIT() \
89 size_t MATCHER_INIT_captures_queue_len = queue_len(peg->captures.queue); \
90 size_t MATCHER_INIT_rule_trace_len = queue_len(peg->rule_trace); \
91 do { \
92 peg->depth++; \
93 if (peg->depth > PEG_MAX_DEPTH || peg->pos > peg->len) { \
94 MATCHER_RETURN(0); \
95 } \
96 if (peg->debug) { \
97 queue_push(peg->rule_trace, (char *)rule); \
98 } \
99 } while (0)
100 #define MATCHER_POP(x) \
101 do { \
102 for (size_t i = MATCHER_INIT_captures_queue_len; i < queue_len(peg->captures.queue); i++) { \
103 struct PEGCapture *capture = queue_dequeue(peg->captures.queue); \
104 set_remove(peg->captures.set, capture); \
105 mempool_release(peg->captures.pool, capture); \
106 } \
107 if (peg->debug) { \
108 for (size_t i = MATCHER_INIT_rule_trace_len; i < queue_len(peg->rule_trace); i++) { \
109 queue_dequeue(peg->rule_trace); \
110 } \
111 } \
112 } while(0)
113 #define MATCHER_RETURN(x) \
114 do { \
115 peg->depth--; \
116 if (!(x)) { \
117 MATCHER_POP(); \
118 } \
119 return (x); \
120 } while (0)
121
DEFINE_COMPARE(compare_capture,struct PEGCapture,void)122 DEFINE_COMPARE(compare_capture, struct PEGCapture, void)
123 {
124 if (a->pos < b->pos) {
125 return -1;
126 } else if (a->pos > b->pos) {
127 return 1;
128 } else if (a->len < b->len) {
129 return -1;
130 } else if (a->len > b->len) {
131 return 1;
132 } else if (a->state < b->state) {
133 return -1;
134 } else if (a->state > b->state) {
135 return 1;
136 } else if (a->tag < b->tag) {
137 return -1;
138 } else if (a->tag > b->tag) {
139 return 1;
140 } else {
141 return 0;
142 }
143 }
144
DEFINE_COMPARE(compare_error,struct PEGError,void)145 DEFINE_COMPARE(compare_error, struct PEGError, void)
146 {
147 if (a->pos < b->pos) {
148 return 1;
149 } else if (a->pos > b->pos) {
150 return -1;
151 } else if (this != NULL && a->index < b->index) {
152 return -1;
153 } else if (this != NULL && a->index > b->index) {
154 return 1;
155 } else if (a->rule == NULL && b->rule == NULL) {
156 return 0;
157 } else if (a->rule == NULL) {
158 return 1;
159 } else if (b->rule == NULL) {
160 return -1;
161 } else {
162 return strcmp(a->rule, b->rule);
163 }
164 }
165
166 bool
peg_match(struct PEG * peg,RuleFn rulefn,CaptureFn capture_machine,void * userdata)167 peg_match(struct PEG *peg, RuleFn rulefn, CaptureFn capture_machine, void *userdata)
168 {
169 peg_reset(peg);
170
171 peg->captures.enabled = capture_machine != NULL;
172 bool result = peg_match_rule(peg, ":main", rulefn);
173
174 if (capture_machine) {
175 bool stop = false;
176 struct PEGCapture *capture;
177 while (!stop && (capture = queue_pop(peg->captures.queue))) {
178 switch (capture_machine(capture, userdata)) {
179 case PEG_CAPTURE_CONTINUE:
180 break;
181 case PEG_CAPTURE_STOP:
182 stop = true;
183 break;
184 }
185 }
186 if (stop) {
187 result = false;
188 } else {
189 struct PEGCapture capture = {
190 .peg = peg,
191 .buf = peg->buf,
192 .pos = 0,
193 .len = peg->pos,
194 .tag = -1,
195 .state = 0, // Accept state
196 };
197 capture_machine(&capture, userdata);
198 }
199 }
200
201 peg_reset(peg);
202
203 if (peg->debug && queue_len(peg->rule_trace) > 0) {
204 char *rule = queue_pop(peg->rule_trace);
205 printf("%s", rule);
206 while ((rule = queue_pop(peg->rule_trace))) {
207 printf(" -> %s", rule);
208 }
209 printf(" -> end@%zu\n", peg->pos);
210 }
211
212 return result;
213 }
214
215 bool
peg_match_atleast(struct PEG * peg,const char * rule,RuleFn rulefn,uint32_t n)216 peg_match_atleast(struct PEG *peg, const char *rule, RuleFn rulefn, uint32_t n)
217 {
218 MATCHER_INIT();
219 size_t pos = peg->pos;
220 uint32_t i;
221 for (i = 0; ; i++) {
222 if (!rulefn(peg)) {
223 break;
224 }
225 }
226 if (i < n) {
227 peg->pos = pos;
228 MATCHER_RETURN(false);
229 }
230 MATCHER_RETURN(true);
231 }
232
233 bool
peg_match_between(struct PEG * peg,const char * rule,RuleFn rulefn,uint32_t a,uint32_t b)234 peg_match_between(struct PEG *peg, const char *rule, RuleFn rulefn, uint32_t a, uint32_t b)
235 {
236 MATCHER_INIT();
237 size_t pos = peg->pos;
238 uint32_t i;
239 for (i = 0; i <= b; i++) {
240 if (!rulefn(peg)) {
241 break;
242 }
243 }
244 if (i >= a && i <= b) {
245 MATCHER_RETURN(true);
246 } else {
247 peg->pos = pos;
248 MATCHER_RETURN(false);
249 }
250 }
251
252 bool
peg_match_capture_start(struct PEG * peg)253 peg_match_capture_start(struct PEG *peg)
254 {
255 if (peg->captures.enabled) {
256 stack_push(peg->captures.pos, (void *)(uintptr_t)peg->pos);
257 }
258 return true;
259 }
260
261 bool
peg_match_capture_end(struct PEG * peg,uint32_t tag,uint32_t state,bool retval)262 peg_match_capture_end(struct PEG *peg, uint32_t tag, uint32_t state, bool retval)
263 {
264 if (peg->captures.enabled && stack_len(peg->captures.pos) > 0) {
265 size_t start = (size_t)stack_pop(peg->captures.pos);
266 if (retval) {
267 size_t len = peg->pos - start;
268 struct PEGCapture c = { .tag = tag, .state = state, .pos = start, .len = len };
269 if (!set_get(peg->captures.set, &c)) {
270 struct PEGCapture *capture = mempool_alloc(peg->captures.pool, sizeof(struct PEGCapture));
271 capture->tag = tag;
272 capture->state = state;
273 capture->buf = peg->buf + start;
274 capture->pos = start;
275 capture->len = len;
276 capture->peg = peg;
277 queue_push(peg->captures.queue, capture);
278 set_add(peg->captures.set, capture);
279 }
280 }
281 }
282 return retval;
283 }
284
285 bool
peg_match_char_f(struct PEG * peg,const char * rule,int (* f)(int))286 peg_match_char_f(struct PEG *peg, const char *rule, int (*f)(int))
287 {
288 MATCHER_INIT();
289 if (peg->pos >= peg->len) {
290 MATCHER_RETURN(false);
291 }
292 if (f((unsigned char)peg->buf[peg->pos])) {
293 peg->pos++;
294 MATCHER_RETURN(true);
295 } else {
296 MATCHER_RETURN(false);
297 }
298 }
299
300 bool
peg_match_chars(struct PEG * peg,const char * rule,uint32_t chars[],size_t len)301 peg_match_chars(struct PEG *peg, const char *rule, uint32_t chars[], size_t len)
302 {
303 MATCHER_INIT();
304 for (size_t i = 0; i < len; i++) {
305 char needle[UTF_SIZE + 1];
306 if (!utf8_encode(chars[i], needle)) {
307 MATCHER_RETURN(false);
308 }
309 size_t len = strlen(needle);
310 if ((peg->len - peg->pos) >= len &&
311 strncmp(peg->buf + peg->pos, needle, len) == 0) {
312 peg->pos += len;
313 MATCHER_RETURN(true);
314 }
315 }
316 MATCHER_RETURN(false);
317 }
318
319 bool
peg_match_eos(struct PEG * peg,const char * rule)320 peg_match_eos(struct PEG *peg, const char *rule)
321 {
322 MATCHER_INIT();
323 MATCHER_RETURN(peg->pos == peg->len);
324 }
325
326 bool
peg_match_error(struct PEG * peg,const char * rule,const char * msg)327 peg_match_error(struct PEG *peg, const char *rule, const char *msg)
328 {
329 struct PEGError key = { .rule = rule, .msg = msg, .pos = peg->pos };
330 if (array_find(peg->errors, &key, compare_error, &key) == -1) {
331 struct PEGError *err = array_get(peg->errors, PEG_MAX_ERRORS - 1);
332 err->index = peg->error_index++;
333 err->rule = rule;
334 err->msg = msg;
335 err->pos = peg->pos;
336 array_sort(peg->errors, compare_error, NULL);
337 }
338 return false;
339 }
340
341 bool
peg_match_lookahead(struct PEG * peg,const char * rule,RuleFn rulefn)342 peg_match_lookahead(struct PEG *peg, const char *rule, RuleFn rulefn)
343 {
344 MATCHER_INIT();
345 size_t pos = peg->pos;
346 if (rulefn(peg)) {
347 peg->pos = pos;
348 MATCHER_POP();
349 return true;
350 } else {
351 peg->pos = pos;
352 MATCHER_RETURN(false);
353 }
354 }
355
356 bool
peg_match_range(struct PEG * peg,const char * rule,uint32_t a,uint32_t b)357 peg_match_range(struct PEG *peg, const char *rule, uint32_t a, uint32_t b)
358 {
359 MATCHER_INIT();
360 if (peg->pos >= peg->len) {
361 MATCHER_RETURN(false);
362 }
363
364 uint32_t c;
365 size_t clen = utf8_decode(peg->buf + peg->pos, peg->len - peg->pos, &c);
366 if (UTF_INVALID == clen || a > c || c > b) {
367 MATCHER_RETURN(false);
368 }
369 peg->pos += clen;
370 MATCHER_RETURN(true);
371 }
372
373 bool
peg_match_repeat(struct PEG * peg,const char * rule,RuleFn rulefn,uint32_t n)374 peg_match_repeat(struct PEG *peg, const char *rule, RuleFn rulefn, uint32_t n)
375 {
376 MATCHER_INIT();
377 size_t pos = peg->pos;
378 for (uint32_t i = 0; i < n; i++) {
379 if (!rulefn(peg)) {
380 peg->pos = pos;
381 MATCHER_RETURN(false);
382 }
383 }
384 MATCHER_RETURN(true);
385 }
386
387 bool
peg_match_rule(struct PEG * peg,const char * rule,RuleFn rulefn)388 peg_match_rule(struct PEG *peg, const char *rule, RuleFn rulefn)
389 {
390 MATCHER_INIT();
391 size_t pos = peg->pos;
392 if (rulefn(peg)) {
393 MATCHER_RETURN(true);
394 } else {
395 peg->pos = pos;
396 MATCHER_RETURN(false);
397 }
398 }
399
400 bool
peg_match_string(struct PEG * peg,const char * rule,const char * needle[],size_t needlelen)401 peg_match_string(struct PEG *peg, const char *rule, const char *needle[], size_t needlelen)
402 {
403 MATCHER_INIT();
404 for (size_t i = 0; i < needlelen; i++) {
405 size_t len = strlen(needle[i]);
406 if ((peg->len - peg->pos) >= len &&
407 strncmp(peg->buf + peg->pos, needle[i], len) == 0) {
408 peg->pos += len;
409 MATCHER_RETURN(true);
410 }
411 }
412 MATCHER_RETURN(false);
413 }
414
415 bool
peg_match_thruto(struct PEG * peg,const char * rule,const char * needle,bool thru)416 peg_match_thruto(struct PEG *peg, const char *rule, const char *needle, bool thru)
417 {
418 MATCHER_INIT();
419 if (strcmp(needle, "") == 0) {
420 MATCHER_RETURN(false);
421 }
422
423 char *ptr = strnstr(peg->buf + peg->pos, needle, peg->len);
424 if (ptr == NULL) {
425 MATCHER_RETURN(true);
426 }
427
428 size_t offset = 0;
429 if (thru) {
430 offset = strlen(needle);
431 }
432 size_t len = ptr - (peg->buf + peg->pos) + offset;
433 peg->pos += len;
434
435 MATCHER_RETURN(true);
436 }
437
438 bool
peg_match_thru(struct PEG * peg,const char * rule,const char * needle)439 peg_match_thru(struct PEG *peg, const char *rule, const char *needle)
440 {
441 return peg_match_thruto(peg, rule, needle, 1);
442 }
443
444 bool
peg_match_to(struct PEG * peg,const char * rule,const char * needle)445 peg_match_to(struct PEG *peg, const char *rule, const char *needle)
446 {
447 return peg_match_thruto(peg, rule, needle, 0);
448 }
449
450 void
peg_line_col_at_pos(struct PEG * peg,size_t pos,size_t * line,size_t * col)451 peg_line_col_at_pos(struct PEG *peg, size_t pos, size_t *line, size_t *col)
452 {
453 *line = 1;
454 *col = 1;
455 size_t len = MIN(pos, peg->len);
456 for (size_t i = 0; i < len;) {
457 uint32_t c;
458 size_t clen = utf8_decode(peg->buf + i, peg->len - i, &c);
459 if (UTF_INVALID == clen) {
460 *line = 1;
461 *col = i;
462 return;
463 }
464 if (c == '\n') {
465 (*line)++;
466 *col = 1;
467 } else {
468 (*col)++;
469 }
470 i += clen;
471 }
472 }
473
474 struct Array *
peg_backtrace(struct PEG * peg,struct Mempool * pool)475 peg_backtrace(struct PEG *peg, struct Mempool *pool)
476 {
477 struct Array *errors = mempool_array(pool);
478 ARRAY_FOREACH(peg->errors, struct PEGError *, err) {
479 if (err->rule == NULL) {
480 break;
481 }
482 size_t line;
483 size_t col;
484 peg_line_col_at_pos(peg, err->pos, &line, &col);
485 char *buf;
486 if (!err->msg || strcmp(err->msg, "") == 0) {
487 buf = str_printf(pool, "%zu:%zu: in %s", line, col, err->rule);
488 } else {
489 buf = str_printf(pool, "%zu:%zu: in %s: %s", line, col, err->rule, err->msg);
490 }
491 array_append(errors, buf);
492 }
493 return errors;
494 }
495
496 struct PEG *
peg_new(const char * const buf,size_t len)497 peg_new(const char *const buf, size_t len)
498 {
499 struct PEG proto = {
500 .buf = buf,
501 .len = len,
502 };
503 struct PEG *peg = xmalloc(sizeof(struct PEG));
504 memcpy(peg, &proto, sizeof(*peg));
505
506 peg->pool = mempool_new();
507 mempool_take(peg->pool, peg);
508
509 peg->errors = mempool_array(peg->pool);
510 for (size_t i = 0; i < PEG_MAX_ERRORS; i++) {
511 array_append(peg->errors, mempool_alloc(peg->pool, sizeof(struct PEGError)));
512 }
513
514 peg->debug = getenv("LIBIAS_PEG_DEBUG") != NULL;
515 peg->rule_trace = mempool_queue(peg->pool);
516
517 peg->captures.pool = mempool_pool(peg->pool);
518 peg->captures.set = mempool_set(peg->pool, compare_capture, peg);
519 peg->captures.queue = mempool_queue(peg->pool);
520 peg->captures.pos = mempool_stack(peg->pool);
521
522 return peg;
523 }
524
525 void
peg_reset(struct PEG * peg)526 peg_reset(struct PEG *peg)
527 {
528 peg->pos = 0;
529 peg->depth = 0;
530 peg->error_index = 0;
531
532 mempool_release_all(peg->captures.pool);
533 stack_truncate(peg->captures.pos);
534 queue_truncate(peg->captures.queue);
535 set_truncate(peg->captures.set);
536 queue_truncate(peg->rule_trace);
537 }
538
539 void
peg_free(struct PEG * peg)540 peg_free(struct PEG *peg)
541 {
542 if (peg == NULL) {
543 return;
544 }
545 mempool_free(peg->pool);
546 }
547