1 /*
2 * Copyright (c) 2021 Calvin Rose
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to
6 * deal in the Software without restriction, including without limitation the
7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 * sell copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22
23 #ifndef JANET_AMALG
24 #include "features.h"
25 #include <janet.h>
26 #include <string.h>
27 #include "util.h"
28 #include "vector.h"
29 #include "util.h"
30 #endif
31
32 #ifdef JANET_PEG
33
34 /*
35 * Runtime
36 */
37
38 /* Hold captured patterns and match state */
39 typedef struct {
40 const uint8_t *text_start;
41 const uint8_t *text_end;
42 const uint32_t *bytecode;
43 const Janet *constants;
44 JanetArray *captures;
45 JanetBuffer *scratch;
46 JanetBuffer *tags;
47 JanetArray *tagged_captures;
48 const Janet *extrav;
49 int32_t *linemap;
50 int32_t extrac;
51 int32_t depth;
52 int32_t linemaplen;
53 int32_t has_backref;
54 enum {
55 PEG_MODE_NORMAL,
56 PEG_MODE_ACCUMULATE
57 } mode;
58 } PegState;
59
60 /* Allow backtrack with captures. We need
61 * to save state at branches, and then reload
62 * if one branch fails and try a new branch. */
63 typedef struct {
64 int32_t cap;
65 int32_t tcap;
66 int32_t scratch;
67 } CapState;
68
69 /* Save the current capture state */
cap_save(PegState * s)70 static CapState cap_save(PegState *s) {
71 CapState cs;
72 cs.scratch = s->scratch->count;
73 cs.cap = s->captures->count;
74 cs.tcap = s->tagged_captures->count;
75 return cs;
76 }
77
78 /* Load a saved capture state in the case of failure */
cap_load(PegState * s,CapState cs)79 static void cap_load(PegState *s, CapState cs) {
80 s->scratch->count = cs.scratch;
81 s->captures->count = cs.cap;
82 s->tags->count = cs.tcap;
83 s->tagged_captures->count = cs.tcap;
84 }
85
86 /* Load a saved capture state in the case of success. Keeps
87 * tagged captures around for backref. */
cap_load_keept(PegState * s,CapState cs)88 static void cap_load_keept(PegState *s, CapState cs) {
89 s->scratch->count = cs.scratch;
90 s->captures->count = cs.cap;
91 }
92
93 /* Add a capture */
pushcap(PegState * s,Janet capture,uint32_t tag)94 static void pushcap(PegState *s, Janet capture, uint32_t tag) {
95 if (s->mode == PEG_MODE_ACCUMULATE) {
96 janet_to_string_b(s->scratch, capture);
97 }
98 if (s->mode == PEG_MODE_NORMAL) {
99 janet_array_push(s->captures, capture);
100 }
101 if (s->has_backref) {
102 janet_array_push(s->tagged_captures, capture);
103 janet_buffer_push_u8(s->tags, tag);
104 }
105 }
106
107 /* Lazily generate line map to get line and column information for PegState.
108 * line and column are 1-indexed. */
109 typedef struct {
110 int32_t line;
111 int32_t col;
112 } LineCol;
get_linecol_from_position(PegState * s,int32_t position)113 static LineCol get_linecol_from_position(PegState *s, int32_t position) {
114 /* Generate if not made yet */
115 if (s->linemaplen < 0) {
116 int32_t newline_count = 0;
117 for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
118 if (*c == '\n') newline_count++;
119 }
120 int32_t *mem = janet_smalloc(sizeof(int32_t) * newline_count);
121 size_t index = 0;
122 for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
123 if (*c == '\n') mem[index++] = (int32_t)(c - s->text_start);
124 }
125 s->linemaplen = newline_count;
126 s->linemap = mem;
127 }
128 /* Do binary search for line. Slightly modified from classic binary search:
129 * - if we find that our current character is a line break, just return immediately.
130 * a newline character is consider to be on the same line as the character before
131 * (\n is line terminator, not line separator).
132 * - in the not-found case, we still want to find the greatest-indexed newline that
133 * is before position. we use that to calcuate the line and column.
134 * - in the case that lo = 0 and s->linemap[0] is still greater than position, we
135 * are on the first line and our column is position + 1. */
136 int32_t hi = s->linemaplen; /* hi is greater than the actual line */
137 int32_t lo = 0; /* lo is less than or equal to the actual line */
138 LineCol ret;
139 while (lo + 1 < hi) {
140 int32_t mid = lo + (hi - lo) / 2;
141 if (s->linemap[mid] >= position) {
142 hi = mid;
143 } else {
144 lo = mid;
145 }
146 }
147 /* first line case */
148 if (s->linemaplen == 0 || (lo == 0 && s->linemap[0] >= position)) {
149 ret.line = 1;
150 ret.col = position + 1;
151 } else {
152 ret.line = lo + 2;
153 ret.col = position - s->linemap[lo];
154 }
155 return ret;
156 }
157
158 /* Convert a uint64_t to a int64_t by wrapping to a maximum number of bytes */
peg_convert_u64_s64(uint64_t from,int width)159 static int64_t peg_convert_u64_s64(uint64_t from, int width) {
160 int shift = 8 * (8 - width);
161 return ((int64_t)(from << shift)) >> shift;
162 }
163
164 /* Prevent stack overflow */
165 #define down1(s) do { \
166 if (0 == --((s)->depth)) janet_panic("peg/match recursed too deeply"); \
167 } while (0)
168 #define up1(s) ((s)->depth++)
169
170 /* Evaluate a peg rule
171 * Pre-conditions: s is in a valid state
172 * Post-conditions: If there is a match, returns a pointer to the next text.
173 * All captures on the capture stack are valid. If there is no match,
174 * returns NULL. Extra captures from successful child expressions can be
175 * left on the capture stack.
176 */
peg_rule(PegState * s,const uint32_t * rule,const uint8_t * text)177 static const uint8_t *peg_rule(
178 PegState *s,
179 const uint32_t *rule,
180 const uint8_t *text) {
181 tail:
182 switch (*rule & 0x1F) {
183 default:
184 janet_panic("unexpected opcode");
185 return NULL;
186
187 case RULE_LITERAL: {
188 uint32_t len = rule[1];
189 if (text + len > s->text_end) return NULL;
190 return memcmp(text, rule + 2, len) ? NULL : text + len;
191 }
192
193 case RULE_NCHAR: {
194 uint32_t n = rule[1];
195 return (text + n > s->text_end) ? NULL : text + n;
196 }
197
198 case RULE_NOTNCHAR: {
199 uint32_t n = rule[1];
200 return (text + n > s->text_end) ? text : NULL;
201 }
202
203 case RULE_RANGE: {
204 uint8_t lo = rule[1] & 0xFF;
205 uint8_t hi = (rule[1] >> 16) & 0xFF;
206 return (text < s->text_end &&
207 text[0] >= lo &&
208 text[0] <= hi)
209 ? text + 1
210 : NULL;
211 }
212
213 case RULE_SET: {
214 uint32_t word = rule[1 + (text[0] >> 5)];
215 uint32_t mask = (uint32_t)1 << (text[0] & 0x1F);
216 return (text < s->text_end && (word & mask))
217 ? text + 1
218 : NULL;
219 }
220
221 case RULE_LOOK: {
222 text += ((int32_t *)rule)[1];
223 if (text < s->text_start || text > s->text_end) return NULL;
224 down1(s);
225 const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text);
226 up1(s);
227 text -= ((int32_t *)rule)[1];
228 return result ? text : NULL;
229 }
230
231 case RULE_CHOICE: {
232 uint32_t len = rule[1];
233 const uint32_t *args = rule + 2;
234 if (len == 0) return NULL;
235 down1(s);
236 CapState cs = cap_save(s);
237 for (uint32_t i = 0; i < len - 1; i++) {
238 const uint8_t *result = peg_rule(s, s->bytecode + args[i], text);
239 if (result) {
240 up1(s);
241 return result;
242 }
243 cap_load(s, cs);
244 }
245 up1(s);
246 rule = s->bytecode + args[len - 1];
247 goto tail;
248 }
249
250 case RULE_SEQUENCE: {
251 uint32_t len = rule[1];
252 const uint32_t *args = rule + 2;
253 if (len == 0) return text;
254 down1(s);
255 for (uint32_t i = 0; text && i < len - 1; i++)
256 text = peg_rule(s, s->bytecode + args[i], text);
257 up1(s);
258 if (!text) return NULL;
259 rule = s->bytecode + args[len - 1];
260 goto tail;
261 }
262
263 case RULE_IF:
264 case RULE_IFNOT: {
265 const uint32_t *rule_a = s->bytecode + rule[1];
266 const uint32_t *rule_b = s->bytecode + rule[2];
267 down1(s);
268 const uint8_t *result = peg_rule(s, rule_a, text);
269 up1(s);
270 if (rule[0] == RULE_IF ? !result : !!result) return NULL;
271 rule = rule_b;
272 goto tail;
273 }
274
275 case RULE_NOT: {
276 const uint32_t *rule_a = s->bytecode + rule[1];
277 down1(s);
278 const uint8_t *result = peg_rule(s, rule_a, text);
279 up1(s);
280 return (result) ? NULL : text;
281 }
282
283 case RULE_THRU:
284 case RULE_TO: {
285 const uint32_t *rule_a = s->bytecode + rule[1];
286 const uint8_t *next_text;
287 CapState cs = cap_save(s);
288 down1(s);
289 while (text <= s->text_end) {
290 CapState cs2 = cap_save(s);
291 next_text = peg_rule(s, rule_a, text);
292 if (next_text) {
293 if (rule[0] == RULE_TO) cap_load(s, cs2);
294 break;
295 }
296 text++;
297 }
298 up1(s);
299 if (text > s->text_end) {
300 cap_load(s, cs);
301 return NULL;
302 }
303 return rule[0] == RULE_TO ? text : next_text;
304 }
305
306 case RULE_BETWEEN: {
307 uint32_t lo = rule[1];
308 uint32_t hi = rule[2];
309 const uint32_t *rule_a = s->bytecode + rule[3];
310 uint32_t captured = 0;
311 const uint8_t *next_text;
312 CapState cs = cap_save(s);
313 down1(s);
314 while (captured < hi) {
315 CapState cs2 = cap_save(s);
316 next_text = peg_rule(s, rule_a, text);
317 if (!next_text || next_text == text) {
318 cap_load(s, cs2);
319 break;
320 }
321 captured++;
322 text = next_text;
323 }
324 up1(s);
325 if (captured < lo) {
326 cap_load(s, cs);
327 return NULL;
328 }
329 return text;
330 }
331
332 /* Capturing rules */
333
334 case RULE_GETTAG: {
335 uint32_t search = rule[1];
336 uint32_t tag = rule[2];
337 for (int32_t i = s->tags->count - 1; i >= 0; i--) {
338 if (s->tags->data[i] == search) {
339 pushcap(s, s->tagged_captures->data[i], tag);
340 return text;
341 }
342 }
343 return NULL;
344 }
345
346 case RULE_POSITION: {
347 pushcap(s, janet_wrap_number((double)(text - s->text_start)), rule[1]);
348 return text;
349 }
350
351 case RULE_LINE: {
352 LineCol lc = get_linecol_from_position(s, (int32_t)(text - s->text_start));
353 pushcap(s, janet_wrap_number((double)(lc.line)), rule[1]);
354 return text;
355 }
356
357 case RULE_COLUMN: {
358 LineCol lc = get_linecol_from_position(s, (int32_t)(text - s->text_start));
359 pushcap(s, janet_wrap_number((double)(lc.col)), rule[1]);
360 return text;
361 }
362
363 case RULE_ARGUMENT: {
364 int32_t index = ((int32_t *)rule)[1];
365 Janet capture = (index >= s->extrac) ? janet_wrap_nil() : s->extrav[index];
366 pushcap(s, capture, rule[2]);
367 return text;
368 }
369
370 case RULE_CONSTANT: {
371 pushcap(s, s->constants[rule[1]], rule[2]);
372 return text;
373 }
374
375 case RULE_CAPTURE: {
376 down1(s);
377 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
378 up1(s);
379 if (!result) return NULL;
380 /* Specialized pushcap - avoid intermediate string creation */
381 if (!s->has_backref && s->mode == PEG_MODE_ACCUMULATE) {
382 janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
383 } else {
384 uint32_t tag = rule[2];
385 pushcap(s, janet_stringv(text, (int32_t)(result - text)), tag);
386 }
387 return result;
388 }
389
390 case RULE_CAPTURE_NUM: {
391 down1(s);
392 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
393 up1(s);
394 if (!result) return NULL;
395 /* check number parsing */
396 double x = 0.0;
397 int32_t base = (int32_t) rule[2];
398 if (janet_scan_number_base(text, (int32_t)(result - text), base, &x)) return NULL;
399 /* Specialized pushcap - avoid intermediate string creation */
400 if (!s->has_backref && s->mode == PEG_MODE_ACCUMULATE) {
401 janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
402 } else {
403 uint32_t tag = rule[3];
404 pushcap(s, janet_wrap_number(x), tag);
405 }
406 return result;
407 }
408
409 case RULE_ACCUMULATE: {
410 uint32_t tag = rule[2];
411 int oldmode = s->mode;
412 if (!tag && oldmode == PEG_MODE_ACCUMULATE) {
413 rule = s->bytecode + rule[1];
414 goto tail;
415 }
416 CapState cs = cap_save(s);
417 s->mode = PEG_MODE_ACCUMULATE;
418 down1(s);
419 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
420 up1(s);
421 s->mode = oldmode;
422 if (!result) return NULL;
423 Janet cap = janet_stringv(s->scratch->data + cs.scratch,
424 s->scratch->count - cs.scratch);
425 cap_load_keept(s, cs);
426 pushcap(s, cap, tag);
427 return result;
428 }
429
430 case RULE_DROP: {
431 CapState cs = cap_save(s);
432 down1(s);
433 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
434 up1(s);
435 if (!result) return NULL;
436 cap_load(s, cs);
437 return result;
438 }
439
440 case RULE_GROUP: {
441 uint32_t tag = rule[2];
442 int oldmode = s->mode;
443 CapState cs = cap_save(s);
444 s->mode = PEG_MODE_NORMAL;
445 down1(s);
446 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
447 up1(s);
448 s->mode = oldmode;
449 if (!result) return NULL;
450 int32_t num_sub_captures = s->captures->count - cs.cap;
451 JanetArray *sub_captures = janet_array(num_sub_captures);
452 safe_memcpy(sub_captures->data,
453 s->captures->data + cs.cap,
454 sizeof(Janet) * num_sub_captures);
455 sub_captures->count = num_sub_captures;
456 cap_load_keept(s, cs);
457 pushcap(s, janet_wrap_array(sub_captures), tag);
458 return result;
459 }
460
461 case RULE_REPLACE:
462 case RULE_MATCHTIME: {
463 uint32_t tag = rule[3];
464 int oldmode = s->mode;
465 CapState cs = cap_save(s);
466 s->mode = PEG_MODE_NORMAL;
467 down1(s);
468 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
469 up1(s);
470 s->mode = oldmode;
471 if (!result) return NULL;
472
473 Janet cap = janet_wrap_nil();
474 Janet constant = s->constants[rule[2]];
475 switch (janet_type(constant)) {
476 default:
477 cap = constant;
478 break;
479 case JANET_STRUCT:
480 if (s->captures->count) {
481 cap = janet_struct_get(janet_unwrap_struct(constant),
482 s->captures->data[s->captures->count - 1]);
483 }
484 break;
485 case JANET_TABLE:
486 if (s->captures->count) {
487 cap = janet_table_get(janet_unwrap_table(constant),
488 s->captures->data[s->captures->count - 1]);
489 }
490 break;
491 case JANET_CFUNCTION:
492 cap = janet_unwrap_cfunction(constant)(s->captures->count - cs.cap,
493 s->captures->data + cs.cap);
494 break;
495 case JANET_FUNCTION:
496 cap = janet_call(janet_unwrap_function(constant),
497 s->captures->count - cs.cap,
498 s->captures->data + cs.cap);
499 break;
500 }
501 cap_load_keept(s, cs);
502 if (rule[0] == RULE_MATCHTIME && !janet_truthy(cap)) return NULL;
503 pushcap(s, cap, tag);
504 return result;
505 }
506
507 case RULE_ERROR: {
508 int oldmode = s->mode;
509 s->mode = PEG_MODE_NORMAL;
510 int32_t old_cap = s->captures->count;
511 down1(s);
512 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
513 up1(s);
514 s->mode = oldmode;
515 if (!result) return NULL;
516 if (s->captures->count > old_cap) {
517 /* Throw last capture */
518 janet_panicv(s->captures->data[s->captures->count - 1]);
519 } else {
520 /* Throw generic error */
521 int32_t start = (int32_t)(text - s->text_start);
522 LineCol lc = get_linecol_from_position(s, start);
523 janet_panicf("match error at line %d, column %d", lc.line, lc.col);
524 }
525 return NULL;
526 }
527
528 case RULE_BACKMATCH: {
529 uint32_t search = rule[1];
530 for (int32_t i = s->tags->count - 1; i >= 0; i--) {
531 if (s->tags->data[i] == search) {
532 Janet capture = s->tagged_captures->data[i];
533 if (!janet_checktype(capture, JANET_STRING))
534 return NULL;
535 const uint8_t *bytes = janet_unwrap_string(capture);
536 int32_t len = janet_string_length(bytes);
537 if (text + len > s->text_end)
538 return NULL;
539 return memcmp(text, bytes, len) ? NULL : text + len;
540 }
541 }
542 return NULL;
543 }
544
545 case RULE_LENPREFIX: {
546 int oldmode = s->mode;
547 s->mode = PEG_MODE_NORMAL;
548 const uint8_t *next_text;
549 CapState cs = cap_save(s);
550 down1(s);
551 next_text = peg_rule(s, s->bytecode + rule[1], text);
552 up1(s);
553 if (NULL == next_text) return NULL;
554 s->mode = oldmode;
555 int32_t num_sub_captures = s->captures->count - cs.cap;
556 Janet lencap;
557 if (num_sub_captures <= 0 ||
558 (lencap = s->captures->data[cs.cap], !janet_checkint(lencap))) {
559 cap_load(s, cs);
560 return NULL;
561 }
562 int32_t nrep = janet_unwrap_integer(lencap);
563 /* drop captures from len pattern */
564 cap_load(s, cs);
565 for (int32_t i = 0; i < nrep; i++) {
566 down1(s);
567 next_text = peg_rule(s, s->bytecode + rule[2], next_text);
568 up1(s);
569 if (NULL == next_text) {
570 cap_load(s, cs);
571 return NULL;
572 }
573 }
574 return next_text;
575 }
576
577 case RULE_READINT: {
578 uint32_t tag = rule[2];
579 uint32_t signedness = rule[1] & 0x10;
580 uint32_t endianess = rule[1] & 0x20;
581 int width = (int)(rule[1] & 0xF);
582 if (text + width > s->text_end) return NULL;
583 uint64_t accum = 0;
584 if (endianess) {
585 /* BE */
586 for (int i = 0; i < width; i++) accum = (accum << 8) | text[i];
587 } else {
588 /* LE */
589 for (int i = width - 1; i >= 0; i--) accum = (accum << 8) | text[i];
590 }
591
592 Janet capture_value;
593 /* We can only parse integeres of greater than 6 bytes reliable if int-types are enabled.
594 * Otherwise, we may lose precision, so 6 is the maximum size when int-types are disabled. */
595 #ifdef JANET_INT_TYPES
596 if (width > 6) {
597 if (signedness) {
598 capture_value = janet_wrap_s64(peg_convert_u64_s64(accum, width));
599 } else {
600 capture_value = janet_wrap_u64(accum);
601 }
602 } else
603 #endif
604 {
605 double double_value;
606 if (signedness) {
607 double_value = (double)(peg_convert_u64_s64(accum, width));
608 } else {
609 double_value = (double)accum;
610 }
611 capture_value = janet_wrap_number(double_value);
612 }
613
614 pushcap(s, capture_value, tag);
615 return text + width;
616 }
617
618 case RULE_UNREF: {
619 int32_t tcap = s->tags->count;
620 down1(s);
621 const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
622 up1(s);
623 if (!result) return NULL;
624 int32_t final_tcap = s->tags->count;
625 /* Truncate tagged captures to not include items of the given tag */
626 int32_t w = tcap;
627 /* If no tag is given, drop ALL tagged captures */
628 if (rule[2]) {
629 for (int32_t i = tcap; i < final_tcap; i++) {
630 if (s->tags->data[i] != (0xFF & rule[2])) {
631 s->tags->data[w] = s->tags->data[i];
632 s->tagged_captures->data[w] = s->tagged_captures->data[i];
633 w++;
634 }
635 }
636 }
637 s->tags->count = w;
638 s->tagged_captures->count = w;
639 return result;
640 }
641
642 }
643 }
644
645 /*
646 * Compilation
647 */
648
649 typedef struct {
650 JanetTable *grammar;
651 JanetTable *default_grammar;
652 JanetTable *tags;
653 Janet *constants;
654 uint32_t *bytecode;
655 Janet form;
656 int depth;
657 uint32_t nexttag;
658 int has_backref;
659 } Builder;
660
661 /* Forward declaration to allow recursion */
662 static uint32_t peg_compile1(Builder *b, Janet peg);
663
664 /*
665 * Errors
666 */
667
builder_cleanup(Builder * b)668 static void builder_cleanup(Builder *b) {
669 janet_v_free(b->constants);
670 janet_v_free(b->bytecode);
671 }
672
peg_panic(Builder * b,const char * msg)673 JANET_NO_RETURN static void peg_panic(Builder *b, const char *msg) {
674 builder_cleanup(b);
675 janet_panicf("grammar error in %p, %s", b->form, msg);
676 }
677
678 #define peg_panicf(b,...) peg_panic((b), (const char *) janet_formatc(__VA_ARGS__))
679
peg_fixarity(Builder * b,int32_t argc,int32_t arity)680 static void peg_fixarity(Builder *b, int32_t argc, int32_t arity) {
681 if (argc != arity) {
682 peg_panicf(b, "expected %d argument%s, got %d",
683 arity,
684 arity == 1 ? "" : "s",
685 argc);
686 }
687 }
688
peg_arity(Builder * b,int32_t arity,int32_t min,int32_t max)689 static void peg_arity(Builder *b, int32_t arity, int32_t min, int32_t max) {
690 if (min >= 0 && arity < min)
691 peg_panicf(b, "arity mismatch, expected at least %d, got %d", min, arity);
692 if (max >= 0 && arity > max)
693 peg_panicf(b, "arity mismatch, expected at most %d, got %d", max, arity);
694 }
695
peg_getset(Builder * b,Janet x)696 static const uint8_t *peg_getset(Builder *b, Janet x) {
697 if (!janet_checktype(x, JANET_STRING))
698 peg_panic(b, "expected string for character set");
699 const uint8_t *str = janet_unwrap_string(x);
700 return str;
701 }
702
peg_getrange(Builder * b,Janet x)703 static const uint8_t *peg_getrange(Builder *b, Janet x) {
704 if (!janet_checktype(x, JANET_STRING))
705 peg_panic(b, "expected string for character range");
706 const uint8_t *str = janet_unwrap_string(x);
707 if (janet_string_length(str) != 2)
708 peg_panicf(b, "expected string to have length 2, got %v", x);
709 if (str[1] < str[0])
710 peg_panicf(b, "range %v is empty", x);
711 return str;
712 }
713
peg_getinteger(Builder * b,Janet x)714 static int32_t peg_getinteger(Builder *b, Janet x) {
715 if (!janet_checkint(x))
716 peg_panicf(b, "expected integer, got %v", x);
717 return janet_unwrap_integer(x);
718 }
719
peg_getnat(Builder * b,Janet x)720 static int32_t peg_getnat(Builder *b, Janet x) {
721 int32_t i = peg_getinteger(b, x);
722 if (i < 0)
723 peg_panicf(b, "expected non-negative integer, got %v", x);
724 return i;
725 }
726
727 /*
728 * Emission
729 */
730
emit_constant(Builder * b,Janet c)731 static uint32_t emit_constant(Builder *b, Janet c) {
732 uint32_t cindex = (uint32_t) janet_v_count(b->constants);
733 janet_v_push(b->constants, c);
734 return cindex;
735 }
736
emit_tag(Builder * b,Janet t)737 static uint32_t emit_tag(Builder *b, Janet t) {
738 if (!janet_checktype(t, JANET_KEYWORD))
739 peg_panicf(b, "expected keyword for capture tag, got %v", t);
740 Janet check = janet_table_get(b->tags, t);
741 if (janet_checktype(check, JANET_NIL)) {
742 uint32_t tag = b->nexttag++;
743 if (tag > 255) {
744 peg_panic(b, "too many tags - up to 255 tags are supported per peg");
745 }
746 Janet val = janet_wrap_number(tag);
747 janet_table_put(b->tags, t, val);
748 return tag;
749 } else {
750 return (uint32_t) janet_unwrap_number(check);
751 }
752 }
753
754 /* Reserve space in bytecode for a rule. When a special emits a rule,
755 * it must place that rule immediately on the bytecode stack. This lets
756 * the compiler know where the rule is going to be before it is complete,
757 * allowing recursive rules. */
758 typedef struct {
759 Builder *builder;
760 uint32_t index;
761 int32_t size;
762 } Reserve;
763
reserve(Builder * b,int32_t size)764 static Reserve reserve(Builder *b, int32_t size) {
765 Reserve r;
766 r.index = janet_v_count(b->bytecode);
767 r.builder = b;
768 r.size = size;
769 for (int32_t i = 0; i < size; i++)
770 janet_v_push(b->bytecode, 0);
771 return r;
772 }
773
774 /* Emit a rule in the builder. Returns the index of the new rule */
emit_rule(Reserve r,int32_t op,int32_t n,const uint32_t * body)775 static void emit_rule(Reserve r, int32_t op, int32_t n, const uint32_t *body) {
776 janet_assert(r.size == n + 1, "bad reserve");
777 r.builder->bytecode[r.index] = op;
778 memcpy(r.builder->bytecode + r.index + 1, body, n * sizeof(uint32_t));
779 }
780
781 /* For RULE_LITERAL */
emit_bytes(Builder * b,uint32_t op,int32_t len,const uint8_t * bytes)782 static void emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *bytes) {
783 uint32_t next_rule = janet_v_count(b->bytecode);
784 janet_v_push(b->bytecode, op);
785 janet_v_push(b->bytecode, len);
786 int32_t words = ((len + 3) >> 2);
787 for (int32_t i = 0; i < words; i++)
788 janet_v_push(b->bytecode, 0);
789 memcpy(b->bytecode + next_rule + 2, bytes, len);
790 }
791
792 /* For fixed arity rules of arities 1, 2, and 3 */
emit_1(Reserve r,uint32_t op,uint32_t arg)793 static void emit_1(Reserve r, uint32_t op, uint32_t arg) {
794 emit_rule(r, op, 1, &arg);
795 }
emit_2(Reserve r,uint32_t op,uint32_t arg1,uint32_t arg2)796 static void emit_2(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2) {
797 uint32_t arr[2] = {arg1, arg2};
798 emit_rule(r, op, 2, arr);
799 }
emit_3(Reserve r,uint32_t op,uint32_t arg1,uint32_t arg2,uint32_t arg3)800 static void emit_3(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2, uint32_t arg3) {
801 uint32_t arr[3] = {arg1, arg2, arg3};
802 emit_rule(r, op, 3, arr);
803 }
804
805 /*
806 * Specials
807 */
808
bitmap_set(uint32_t * bitmap,uint8_t c)809 static void bitmap_set(uint32_t *bitmap, uint8_t c) {
810 bitmap[c >> 5] |= ((uint32_t)1) << (c & 0x1F);
811 }
812
spec_range(Builder * b,int32_t argc,const Janet * argv)813 static void spec_range(Builder *b, int32_t argc, const Janet *argv) {
814 peg_arity(b, argc, 1, -1);
815 if (argc == 1) {
816 Reserve r = reserve(b, 2);
817 const uint8_t *str = peg_getrange(b, argv[0]);
818 uint32_t arg = str[0] | (str[1] << 16);
819 emit_1(r, RULE_RANGE, arg);
820 } else {
821 /* Compile as a set */
822 Reserve r = reserve(b, 9);
823 uint32_t bitmap[8] = {0};
824 for (int32_t i = 0; i < argc; i++) {
825 const uint8_t *str = peg_getrange(b, argv[i]);
826 for (uint32_t c = str[0]; c <= str[1]; c++)
827 bitmap_set(bitmap, c);
828 }
829 emit_rule(r, RULE_SET, 8, bitmap);
830 }
831 }
832
spec_set(Builder * b,int32_t argc,const Janet * argv)833 static void spec_set(Builder *b, int32_t argc, const Janet *argv) {
834 peg_fixarity(b, argc, 1);
835 Reserve r = reserve(b, 9);
836 const uint8_t *str = peg_getset(b, argv[0]);
837 uint32_t bitmap[8] = {0};
838 for (int32_t i = 0; i < janet_string_length(str); i++)
839 bitmap_set(bitmap, str[i]);
840 emit_rule(r, RULE_SET, 8, bitmap);
841 }
842
spec_look(Builder * b,int32_t argc,const Janet * argv)843 static void spec_look(Builder *b, int32_t argc, const Janet *argv) {
844 peg_arity(b, argc, 1, 2);
845 Reserve r = reserve(b, 3);
846 int32_t rulearg = argc == 2 ? 1 : 0;
847 int32_t offset = argc == 2 ? peg_getinteger(b, argv[0]) : 0;
848 uint32_t subrule = peg_compile1(b, argv[rulearg]);
849 emit_2(r, RULE_LOOK, (uint32_t) offset, subrule);
850 }
851
852 /* Rule of the form [len, rules...] */
spec_variadic(Builder * b,int32_t argc,const Janet * argv,uint32_t op)853 static void spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
854 uint32_t rule = janet_v_count(b->bytecode);
855 janet_v_push(b->bytecode, op);
856 janet_v_push(b->bytecode, argc);
857 for (int32_t i = 0; i < argc; i++)
858 janet_v_push(b->bytecode, 0);
859 for (int32_t i = 0; i < argc; i++) {
860 uint32_t rulei = peg_compile1(b, argv[i]);
861 b->bytecode[rule + 2 + i] = rulei;
862 }
863 }
864
spec_choice(Builder * b,int32_t argc,const Janet * argv)865 static void spec_choice(Builder *b, int32_t argc, const Janet *argv) {
866 spec_variadic(b, argc, argv, RULE_CHOICE);
867 }
spec_sequence(Builder * b,int32_t argc,const Janet * argv)868 static void spec_sequence(Builder *b, int32_t argc, const Janet *argv) {
869 spec_variadic(b, argc, argv, RULE_SEQUENCE);
870 }
871
872 /* For (if a b) and (if-not a b) */
spec_branch(Builder * b,int32_t argc,const Janet * argv,uint32_t rule)873 static void spec_branch(Builder *b, int32_t argc, const Janet *argv, uint32_t rule) {
874 peg_fixarity(b, argc, 2);
875 Reserve r = reserve(b, 3);
876 uint32_t rule_a = peg_compile1(b, argv[0]);
877 uint32_t rule_b = peg_compile1(b, argv[1]);
878 emit_2(r, rule, rule_a, rule_b);
879 }
880
spec_if(Builder * b,int32_t argc,const Janet * argv)881 static void spec_if(Builder *b, int32_t argc, const Janet *argv) {
882 spec_branch(b, argc, argv, RULE_IF);
883 }
spec_ifnot(Builder * b,int32_t argc,const Janet * argv)884 static void spec_ifnot(Builder *b, int32_t argc, const Janet *argv) {
885 spec_branch(b, argc, argv, RULE_IFNOT);
886 }
spec_lenprefix(Builder * b,int32_t argc,const Janet * argv)887 static void spec_lenprefix(Builder *b, int32_t argc, const Janet *argv) {
888 spec_branch(b, argc, argv, RULE_LENPREFIX);
889 }
890
spec_between(Builder * b,int32_t argc,const Janet * argv)891 static void spec_between(Builder *b, int32_t argc, const Janet *argv) {
892 peg_fixarity(b, argc, 3);
893 Reserve r = reserve(b, 4);
894 int32_t lo = peg_getnat(b, argv[0]);
895 int32_t hi = peg_getnat(b, argv[1]);
896 uint32_t subrule = peg_compile1(b, argv[2]);
897 emit_3(r, RULE_BETWEEN, lo, hi, subrule);
898 }
899
spec_repeater(Builder * b,int32_t argc,const Janet * argv,int32_t min)900 static void spec_repeater(Builder *b, int32_t argc, const Janet *argv, int32_t min) {
901 peg_fixarity(b, argc, 1);
902 Reserve r = reserve(b, 4);
903 uint32_t subrule = peg_compile1(b, argv[0]);
904 emit_3(r, RULE_BETWEEN, min, UINT32_MAX, subrule);
905 }
906
spec_some(Builder * b,int32_t argc,const Janet * argv)907 static void spec_some(Builder *b, int32_t argc, const Janet *argv) {
908 spec_repeater(b, argc, argv, 1);
909 }
spec_any(Builder * b,int32_t argc,const Janet * argv)910 static void spec_any(Builder *b, int32_t argc, const Janet *argv) {
911 spec_repeater(b, argc, argv, 0);
912 }
913
spec_atleast(Builder * b,int32_t argc,const Janet * argv)914 static void spec_atleast(Builder *b, int32_t argc, const Janet *argv) {
915 peg_fixarity(b, argc, 2);
916 Reserve r = reserve(b, 4);
917 int32_t n = peg_getnat(b, argv[0]);
918 uint32_t subrule = peg_compile1(b, argv[1]);
919 emit_3(r, RULE_BETWEEN, n, UINT32_MAX, subrule);
920 }
921
spec_atmost(Builder * b,int32_t argc,const Janet * argv)922 static void spec_atmost(Builder *b, int32_t argc, const Janet *argv) {
923 peg_fixarity(b, argc, 2);
924 Reserve r = reserve(b, 4);
925 int32_t n = peg_getnat(b, argv[0]);
926 uint32_t subrule = peg_compile1(b, argv[1]);
927 emit_3(r, RULE_BETWEEN, 0, n, subrule);
928 }
929
spec_opt(Builder * b,int32_t argc,const Janet * argv)930 static void spec_opt(Builder *b, int32_t argc, const Janet *argv) {
931 peg_fixarity(b, argc, 1);
932 Reserve r = reserve(b, 4);
933 uint32_t subrule = peg_compile1(b, argv[0]);
934 emit_3(r, RULE_BETWEEN, 0, 1, subrule);
935 }
936
spec_repeat(Builder * b,int32_t argc,const Janet * argv)937 static void spec_repeat(Builder *b, int32_t argc, const Janet *argv) {
938 peg_fixarity(b, argc, 2);
939 Reserve r = reserve(b, 4);
940 int32_t n = peg_getnat(b, argv[0]);
941 uint32_t subrule = peg_compile1(b, argv[1]);
942 emit_3(r, RULE_BETWEEN, n, n, subrule);
943 }
944
945 /* Rule of the form [rule] */
spec_onerule(Builder * b,int32_t argc,const Janet * argv,uint32_t op)946 static void spec_onerule(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
947 peg_fixarity(b, argc, 1);
948 Reserve r = reserve(b, 2);
949 uint32_t rule = peg_compile1(b, argv[0]);
950 emit_1(r, op, rule);
951 }
952
spec_not(Builder * b,int32_t argc,const Janet * argv)953 static void spec_not(Builder *b, int32_t argc, const Janet *argv) {
954 spec_onerule(b, argc, argv, RULE_NOT);
955 }
spec_error(Builder * b,int32_t argc,const Janet * argv)956 static void spec_error(Builder *b, int32_t argc, const Janet *argv) {
957 if (argc == 0) {
958 Reserve r = reserve(b, 2);
959 uint32_t rule = peg_compile1(b, janet_wrap_number(0));
960 emit_1(r, RULE_ERROR, rule);
961 } else {
962 spec_onerule(b, argc, argv, RULE_ERROR);
963 }
964 }
spec_to(Builder * b,int32_t argc,const Janet * argv)965 static void spec_to(Builder *b, int32_t argc, const Janet *argv) {
966 spec_onerule(b, argc, argv, RULE_TO);
967 }
spec_thru(Builder * b,int32_t argc,const Janet * argv)968 static void spec_thru(Builder *b, int32_t argc, const Janet *argv) {
969 spec_onerule(b, argc, argv, RULE_THRU);
970 }
spec_drop(Builder * b,int32_t argc,const Janet * argv)971 static void spec_drop(Builder *b, int32_t argc, const Janet *argv) {
972 spec_onerule(b, argc, argv, RULE_DROP);
973 }
974
975 /* Rule of the form [rule, tag] */
spec_cap1(Builder * b,int32_t argc,const Janet * argv,uint32_t op)976 static void spec_cap1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
977 peg_arity(b, argc, 1, 2);
978 Reserve r = reserve(b, 3);
979 uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
980 uint32_t rule = peg_compile1(b, argv[0]);
981 emit_2(r, op, rule, tag);
982 }
983
spec_capture(Builder * b,int32_t argc,const Janet * argv)984 static void spec_capture(Builder *b, int32_t argc, const Janet *argv) {
985 spec_cap1(b, argc, argv, RULE_CAPTURE);
986 }
spec_accumulate(Builder * b,int32_t argc,const Janet * argv)987 static void spec_accumulate(Builder *b, int32_t argc, const Janet *argv) {
988 spec_cap1(b, argc, argv, RULE_ACCUMULATE);
989 }
spec_group(Builder * b,int32_t argc,const Janet * argv)990 static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
991 spec_cap1(b, argc, argv, RULE_GROUP);
992 }
spec_unref(Builder * b,int32_t argc,const Janet * argv)993 static void spec_unref(Builder *b, int32_t argc, const Janet *argv) {
994 spec_cap1(b, argc, argv, RULE_UNREF);
995 }
996
spec_capture_number(Builder * b,int32_t argc,const Janet * argv)997 static void spec_capture_number(Builder *b, int32_t argc, const Janet *argv) {
998 peg_arity(b, argc, 1, 3);
999 Reserve r = reserve(b, 4);
1000 uint32_t base = 0;
1001 if (argc >= 2) {
1002 if (!janet_checktype(argv[1], JANET_NIL)) {
1003 if (!janet_checkint(argv[1])) goto error;
1004 base = (uint32_t) janet_unwrap_integer(argv[1]);
1005 if (base < 2 || base > 36) goto error;
1006 }
1007 }
1008 uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
1009 uint32_t rule = peg_compile1(b, argv[0]);
1010 emit_3(r, RULE_CAPTURE_NUM, rule, base, tag);
1011 return;
1012 error:
1013 peg_panicf(b, "expected integer between 2 and 36, got %v", argv[2]);
1014 }
1015
spec_reference(Builder * b,int32_t argc,const Janet * argv)1016 static void spec_reference(Builder *b, int32_t argc, const Janet *argv) {
1017 peg_arity(b, argc, 1, 2);
1018 Reserve r = reserve(b, 3);
1019 uint32_t search = emit_tag(b, argv[0]);
1020 uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1021 b->has_backref = 1;
1022 emit_2(r, RULE_GETTAG, search, tag);
1023 }
1024
spec_tag1(Builder * b,int32_t argc,const Janet * argv,uint32_t op)1025 static void spec_tag1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
1026 peg_arity(b, argc, 0, 1);
1027 Reserve r = reserve(b, 2);
1028 uint32_t tag = (argc) ? emit_tag(b, argv[0]) : 0;
1029 (void) argv;
1030 emit_1(r, op, tag);
1031 }
1032
spec_position(Builder * b,int32_t argc,const Janet * argv)1033 static void spec_position(Builder *b, int32_t argc, const Janet *argv) {
1034 spec_tag1(b, argc, argv, RULE_POSITION);
1035 }
spec_line(Builder * b,int32_t argc,const Janet * argv)1036 static void spec_line(Builder *b, int32_t argc, const Janet *argv) {
1037 spec_tag1(b, argc, argv, RULE_LINE);
1038 }
spec_column(Builder * b,int32_t argc,const Janet * argv)1039 static void spec_column(Builder *b, int32_t argc, const Janet *argv) {
1040 spec_tag1(b, argc, argv, RULE_COLUMN);
1041 }
1042
spec_backmatch(Builder * b,int32_t argc,const Janet * argv)1043 static void spec_backmatch(Builder *b, int32_t argc, const Janet *argv) {
1044 b->has_backref = 1;
1045 spec_tag1(b, argc, argv, RULE_BACKMATCH);
1046 }
1047
spec_argument(Builder * b,int32_t argc,const Janet * argv)1048 static void spec_argument(Builder *b, int32_t argc, const Janet *argv) {
1049 peg_arity(b, argc, 1, 2);
1050 Reserve r = reserve(b, 3);
1051 uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1052 int32_t index = peg_getnat(b, argv[0]);
1053 emit_2(r, RULE_ARGUMENT, index, tag);
1054 }
1055
spec_constant(Builder * b,int32_t argc,const Janet * argv)1056 static void spec_constant(Builder *b, int32_t argc, const Janet *argv) {
1057 janet_arity(argc, 1, 2);
1058 Reserve r = reserve(b, 3);
1059 uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1060 emit_2(r, RULE_CONSTANT, emit_constant(b, argv[0]), tag);
1061 }
1062
spec_replace(Builder * b,int32_t argc,const Janet * argv)1063 static void spec_replace(Builder *b, int32_t argc, const Janet *argv) {
1064 peg_arity(b, argc, 2, 3);
1065 Reserve r = reserve(b, 4);
1066 uint32_t subrule = peg_compile1(b, argv[0]);
1067 uint32_t constant = emit_constant(b, argv[1]);
1068 uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
1069 emit_3(r, RULE_REPLACE, subrule, constant, tag);
1070 }
1071
spec_matchtime(Builder * b,int32_t argc,const Janet * argv)1072 static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
1073 peg_arity(b, argc, 2, 3);
1074 Reserve r = reserve(b, 4);
1075 uint32_t subrule = peg_compile1(b, argv[0]);
1076 Janet fun = argv[1];
1077 if (!janet_checktype(fun, JANET_FUNCTION) &&
1078 !janet_checktype(fun, JANET_CFUNCTION)) {
1079 peg_panicf(b, "expected function|cfunction, got %v", fun);
1080 }
1081 uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
1082 uint32_t cindex = emit_constant(b, fun);
1083 emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
1084 }
1085
1086 #ifdef JANET_INT_TYPES
1087 #define JANET_MAX_READINT_WIDTH 8
1088 #else
1089 #define JANET_MAX_READINT_WIDTH 6
1090 #endif
1091
spec_readint(Builder * b,int32_t argc,const Janet * argv,uint32_t mask)1092 static void spec_readint(Builder *b, int32_t argc, const Janet *argv, uint32_t mask) {
1093 peg_arity(b, argc, 1, 2);
1094 Reserve r = reserve(b, 3);
1095 uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1096 int32_t width = peg_getnat(b, argv[0]);
1097 if ((width < 0) || (width > JANET_MAX_READINT_WIDTH)) {
1098 peg_panicf(b, "width must be between 0 and %d, got %d", JANET_MAX_READINT_WIDTH, width);
1099 }
1100 emit_2(r, RULE_READINT, mask | ((uint32_t) width), tag);
1101 }
1102
spec_uint_le(Builder * b,int32_t argc,const Janet * argv)1103 static void spec_uint_le(Builder *b, int32_t argc, const Janet *argv) {
1104 spec_readint(b, argc, argv, 0x0u);
1105 }
spec_int_le(Builder * b,int32_t argc,const Janet * argv)1106 static void spec_int_le(Builder *b, int32_t argc, const Janet *argv) {
1107 spec_readint(b, argc, argv, 0x10u);
1108 }
spec_uint_be(Builder * b,int32_t argc,const Janet * argv)1109 static void spec_uint_be(Builder *b, int32_t argc, const Janet *argv) {
1110 spec_readint(b, argc, argv, 0x20u);
1111 }
spec_int_be(Builder * b,int32_t argc,const Janet * argv)1112 static void spec_int_be(Builder *b, int32_t argc, const Janet *argv) {
1113 spec_readint(b, argc, argv, 0x30u);
1114 }
1115
1116 /* Special compiler form */
1117 typedef void (*Special)(Builder *b, int32_t argc, const Janet *argv);
1118 typedef struct {
1119 const char *name;
1120 Special special;
1121 } SpecialPair;
1122
1123 /* Keep in lexical order (vim :sort works well) */
1124 static const SpecialPair peg_specials[] = {
1125 {"!", spec_not},
1126 {"$", spec_position},
1127 {"%", spec_accumulate},
1128 {"*", spec_sequence},
1129 {"+", spec_choice},
1130 {"->", spec_reference},
1131 {"/", spec_replace},
1132 {"<-", spec_capture},
1133 {">", spec_look},
1134 {"?", spec_opt},
1135 {"accumulate", spec_accumulate},
1136 {"any", spec_any},
1137 {"argument", spec_argument},
1138 {"at-least", spec_atleast},
1139 {"at-most", spec_atmost},
1140 {"backmatch", spec_backmatch},
1141 {"backref", spec_reference},
1142 {"between", spec_between},
1143 {"capture", spec_capture},
1144 {"choice", spec_choice},
1145 {"cmt", spec_matchtime},
1146 {"column", spec_column},
1147 {"constant", spec_constant},
1148 {"drop", spec_drop},
1149 {"error", spec_error},
1150 {"group", spec_group},
1151 {"if", spec_if},
1152 {"if-not", spec_ifnot},
1153 {"int", spec_int_le},
1154 {"int-be", spec_int_be},
1155 {"lenprefix", spec_lenprefix},
1156 {"line", spec_line},
1157 {"look", spec_look},
1158 {"not", spec_not},
1159 {"number", spec_capture_number},
1160 {"opt", spec_opt},
1161 {"position", spec_position},
1162 {"quote", spec_capture},
1163 {"range", spec_range},
1164 {"repeat", spec_repeat},
1165 {"replace", spec_replace},
1166 {"sequence", spec_sequence},
1167 {"set", spec_set},
1168 {"some", spec_some},
1169 {"thru", spec_thru},
1170 {"to", spec_to},
1171 {"uint", spec_uint_le},
1172 {"uint-be", spec_uint_be},
1173 {"unref", spec_unref},
1174 };
1175
1176 /* Compile a janet value into a rule and return the rule index. */
peg_compile1(Builder * b,Janet peg)1177 static uint32_t peg_compile1(Builder *b, Janet peg) {
1178
1179 /* Keep track of the form being compiled for error purposes */
1180 Janet old_form = b->form;
1181 JanetTable *old_grammar = b->grammar;
1182 b->form = peg;
1183
1184 /* Resolve keyword references */
1185 int i = JANET_RECURSION_GUARD;
1186 JanetTable *grammar = old_grammar;
1187 for (; i > 0 && janet_checktype(peg, JANET_KEYWORD); --i) {
1188 Janet nextPeg = janet_table_get_ex(grammar, peg, &grammar);
1189 if (!grammar || janet_checktype(nextPeg, JANET_NIL)) {
1190 nextPeg = (b->default_grammar == NULL)
1191 ? janet_wrap_nil()
1192 : janet_table_get(b->default_grammar, peg);
1193 if (janet_checktype(nextPeg, JANET_NIL)) {
1194 peg_panic(b, "unknown rule");
1195 }
1196 }
1197 peg = nextPeg;
1198 b->form = peg;
1199 b->grammar = grammar;
1200 }
1201 if (i == 0)
1202 peg_panic(b, "reference chain too deep");
1203
1204 /* Check cache - for tuples we check only the local cache, as
1205 * in a different grammar, the same tuple can compile to a different
1206 * rule - for example, (+ :a :b) depends on whatever :a and :b are bound to. */
1207 Janet check = janet_checktype(peg, JANET_TUPLE)
1208 ? janet_table_rawget(grammar, peg)
1209 : janet_table_get(grammar, peg);
1210 if (!janet_checktype(check, JANET_NIL)) {
1211 b->form = old_form;
1212 b->grammar = old_grammar;
1213 return (uint32_t) janet_unwrap_number(check);
1214 }
1215
1216 /* Check depth */
1217 if (b->depth-- == 0)
1218 peg_panic(b, "peg grammar recursed too deeply");
1219
1220 /* The final rule to return */
1221 uint32_t rule = janet_v_count(b->bytecode);
1222
1223 /* Add to cache. Do not cache structs, as we don't yet know
1224 * what rule they will return! We can just as effectively cache
1225 * the structs main rule. */
1226 if (!janet_checktype(peg, JANET_STRUCT)) {
1227 JanetTable *which_grammar = grammar;
1228 /* If we are a primitive pattern, add to the global cache (root grammar table) */
1229 if (!janet_checktype(peg, JANET_TUPLE)) {
1230 while (which_grammar->proto)
1231 which_grammar = which_grammar->proto;
1232 }
1233 janet_table_put(which_grammar, peg, janet_wrap_number(rule));
1234 }
1235
1236 switch (janet_type(peg)) {
1237 default:
1238 peg_panic(b, "unexpected peg source");
1239 return 0;
1240 case JANET_NUMBER: {
1241 int32_t n = peg_getinteger(b, peg);
1242 Reserve r = reserve(b, 2);
1243 if (n < 0) {
1244 emit_1(r, RULE_NOTNCHAR, -n);
1245 } else {
1246 emit_1(r, RULE_NCHAR, n);
1247 }
1248 break;
1249 }
1250 case JANET_STRING: {
1251 const uint8_t *str = janet_unwrap_string(peg);
1252 int32_t len = janet_string_length(str);
1253 emit_bytes(b, RULE_LITERAL, len, str);
1254 break;
1255 }
1256 case JANET_TABLE: {
1257 /* Build grammar table */
1258 JanetTable *new_grammar = janet_table_clone(janet_unwrap_table(peg));
1259 new_grammar->proto = grammar;
1260 b->grammar = grammar = new_grammar;
1261 /* Run the main rule */
1262 Janet main_rule = janet_table_rawget(grammar, janet_ckeywordv("main"));
1263 if (janet_checktype(main_rule, JANET_NIL))
1264 peg_panic(b, "grammar requires :main rule");
1265 rule = peg_compile1(b, main_rule);
1266 break;
1267 }
1268 case JANET_STRUCT: {
1269 /* Build grammar table */
1270 const JanetKV *st = janet_unwrap_struct(peg);
1271 JanetTable *new_grammar = janet_table(2 * janet_struct_capacity(st));
1272 for (int32_t i = 0; i < janet_struct_capacity(st); i++) {
1273 if (janet_checktype(st[i].key, JANET_KEYWORD)) {
1274 janet_table_put(new_grammar, st[i].key, st[i].value);
1275 }
1276 }
1277 new_grammar->proto = grammar;
1278 b->grammar = grammar = new_grammar;
1279 /* Run the main rule */
1280 Janet main_rule = janet_table_rawget(grammar, janet_ckeywordv("main"));
1281 if (janet_checktype(main_rule, JANET_NIL))
1282 peg_panic(b, "grammar requires :main rule");
1283 rule = peg_compile1(b, main_rule);
1284 break;
1285 }
1286 case JANET_TUPLE: {
1287 const Janet *tup = janet_unwrap_tuple(peg);
1288 int32_t len = janet_tuple_length(tup);
1289 if (len == 0) peg_panic(b, "tuple in grammar must have non-zero length");
1290 if (janet_checkint(tup[0])) {
1291 int32_t n = janet_unwrap_integer(tup[0]);
1292 if (n < 0) {
1293 peg_panicf(b, "expected non-negative integer, got %d", n);
1294 }
1295 spec_repeat(b, len, tup);
1296 break;
1297 }
1298 if (!janet_checktype(tup[0], JANET_SYMBOL))
1299 peg_panicf(b, "expected grammar command, found %v", tup[0]);
1300 const uint8_t *sym = janet_unwrap_symbol(tup[0]);
1301 const SpecialPair *sp = janet_strbinsearch(
1302 &peg_specials,
1303 sizeof(peg_specials) / sizeof(SpecialPair),
1304 sizeof(SpecialPair),
1305 sym);
1306 if (sp) {
1307 sp->special(b, len - 1, tup + 1);
1308 } else {
1309 peg_panicf(b, "unknown special %S", sym);
1310 }
1311 break;
1312 }
1313 }
1314
1315 /* Increase depth again */
1316 b->depth++;
1317 b->form = old_form;
1318 b->grammar = old_grammar;
1319 return rule;
1320 }
1321
1322 /*
1323 * Post-Compilation
1324 */
1325
peg_mark(void * p,size_t size)1326 static int peg_mark(void *p, size_t size) {
1327 (void) size;
1328 JanetPeg *peg = (JanetPeg *)p;
1329 if (NULL != peg->constants)
1330 for (uint32_t i = 0; i < peg->num_constants; i++)
1331 janet_mark(peg->constants[i]);
1332 return 0;
1333 }
1334
peg_marshal(void * p,JanetMarshalContext * ctx)1335 static void peg_marshal(void *p, JanetMarshalContext *ctx) {
1336 JanetPeg *peg = (JanetPeg *)p;
1337 janet_marshal_size(ctx, peg->bytecode_len);
1338 janet_marshal_int(ctx, (int32_t)peg->num_constants);
1339 janet_marshal_abstract(ctx, p);
1340 for (size_t i = 0; i < peg->bytecode_len; i++)
1341 janet_marshal_int(ctx, (int32_t) peg->bytecode[i]);
1342 for (uint32_t j = 0; j < peg->num_constants; j++)
1343 janet_marshal_janet(ctx, peg->constants[j]);
1344 }
1345
1346 /* Used to ensure that if we place several arrays in one memory chunk, each
1347 * array will be correctly aligned */
size_padded(size_t offset,size_t size)1348 static size_t size_padded(size_t offset, size_t size) {
1349 size_t x = size + offset - 1;
1350 return x - (x % size);
1351 }
1352
peg_unmarshal(JanetMarshalContext * ctx)1353 static void *peg_unmarshal(JanetMarshalContext *ctx) {
1354 size_t bytecode_len = janet_unmarshal_size(ctx);
1355 uint32_t num_constants = (uint32_t) janet_unmarshal_int(ctx);
1356
1357 /* Calculate offsets. Should match those in make_peg */
1358 size_t bytecode_start = size_padded(sizeof(JanetPeg), sizeof(uint32_t));
1359 size_t bytecode_size = bytecode_len * sizeof(uint32_t);
1360 size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
1361 size_t total_size = constants_start + sizeof(Janet) * (size_t) num_constants;
1362
1363 /* DOS prevention? I.E. we could read bytecode and constants before
1364 * hand so we don't allocated a ton of memory on bad, short input */
1365
1366 /* Allocate PEG */
1367 char *mem = janet_unmarshal_abstract(ctx, total_size);
1368 JanetPeg *peg = (JanetPeg *)mem;
1369 uint32_t *bytecode = (uint32_t *)(mem + bytecode_start);
1370 Janet *constants = (Janet *)(mem + constants_start);
1371 peg->bytecode = NULL;
1372 peg->constants = NULL;
1373 peg->bytecode_len = bytecode_len;
1374 peg->num_constants = num_constants;
1375
1376 for (size_t i = 0; i < peg->bytecode_len; i++)
1377 bytecode[i] = (uint32_t) janet_unmarshal_int(ctx);
1378 for (uint32_t j = 0; j < peg->num_constants; j++)
1379 constants[j] = janet_unmarshal_janet(ctx);
1380
1381 /* After here, no panics except for the bad: label. */
1382
1383 /* Keep track at each index if an instruction was
1384 * reference (0x01) or is in a main bytecode position
1385 * (0x02). This lets us do a linear scan and not
1386 * need to a depth first traversal. It is stricter
1387 * than a dfs by not allowing certain kinds of unused
1388 * bytecode. */
1389 uint32_t blen = (int32_t) peg->bytecode_len;
1390 uint32_t clen = peg->num_constants;
1391 uint8_t *op_flags = janet_calloc(1, blen);
1392 if (NULL == op_flags) {
1393 JANET_OUT_OF_MEMORY;
1394 }
1395
1396 /* verify peg bytecode */
1397 int32_t has_backref = 0;
1398 uint32_t i = 0;
1399 while (i < blen) {
1400 uint32_t instr = bytecode[i];
1401 uint32_t *rule = bytecode + i;
1402 op_flags[i] |= 0x02;
1403 switch (instr & 0x1F) {
1404 case RULE_LITERAL:
1405 i += 2 + ((rule[1] + 3) >> 2);
1406 break;
1407 case RULE_NCHAR:
1408 case RULE_NOTNCHAR:
1409 case RULE_RANGE:
1410 case RULE_POSITION:
1411 case RULE_LINE:
1412 case RULE_COLUMN:
1413 /* [1 word] */
1414 i += 2;
1415 break;
1416 case RULE_BACKMATCH:
1417 /* [1 word] */
1418 i += 2;
1419 has_backref = 1;
1420 break;
1421 case RULE_SET:
1422 /* [8 words] */
1423 i += 9;
1424 break;
1425 case RULE_LOOK:
1426 /* [offset, rule] */
1427 if (rule[2] >= blen) goto bad;
1428 op_flags[rule[2]] |= 0x1;
1429 i += 3;
1430 break;
1431 case RULE_CHOICE:
1432 case RULE_SEQUENCE:
1433 /* [len, rules...] */
1434 {
1435 uint32_t len = rule[1];
1436 for (uint32_t j = 0; j < len; j++) {
1437 if (rule[2 + j] >= blen) goto bad;
1438 op_flags[rule[2 + j]] |= 0x1;
1439 }
1440 i += 2 + len;
1441 }
1442 break;
1443 case RULE_IF:
1444 case RULE_IFNOT:
1445 case RULE_LENPREFIX:
1446 /* [rule_a, rule_b (b if not a)] */
1447 if (rule[1] >= blen) goto bad;
1448 if (rule[2] >= blen) goto bad;
1449 op_flags[rule[1]] |= 0x01;
1450 op_flags[rule[2]] |= 0x01;
1451 i += 3;
1452 break;
1453 case RULE_BETWEEN:
1454 /* [lo, hi, rule] */
1455 if (rule[3] >= blen) goto bad;
1456 op_flags[rule[3]] |= 0x01;
1457 i += 4;
1458 break;
1459 case RULE_ARGUMENT:
1460 /* [searchtag, tag] */
1461 i += 3;
1462 break;
1463 case RULE_GETTAG:
1464 /* [searchtag, tag] */
1465 i += 3;
1466 has_backref = 1;
1467 break;
1468 case RULE_CONSTANT:
1469 /* [constant, tag] */
1470 if (rule[1] >= clen) goto bad;
1471 i += 3;
1472 break;
1473 case RULE_CAPTURE_NUM:
1474 /* [rule, base, tag] */
1475 if (rule[1] >= blen) goto bad;
1476 op_flags[rule[1]] |= 0x01;
1477 i += 4;
1478 break;
1479 case RULE_ACCUMULATE:
1480 case RULE_GROUP:
1481 case RULE_CAPTURE:
1482 case RULE_UNREF:
1483 /* [rule, tag] */
1484 if (rule[1] >= blen) goto bad;
1485 op_flags[rule[1]] |= 0x01;
1486 i += 3;
1487 break;
1488 case RULE_REPLACE:
1489 case RULE_MATCHTIME:
1490 /* [rule, constant, tag] */
1491 if (rule[1] >= blen) goto bad;
1492 if (rule[2] >= clen) goto bad;
1493 op_flags[rule[1]] |= 0x01;
1494 i += 4;
1495 break;
1496 case RULE_ERROR:
1497 case RULE_DROP:
1498 case RULE_NOT:
1499 case RULE_TO:
1500 case RULE_THRU:
1501 /* [rule] */
1502 if (rule[1] >= blen) goto bad;
1503 op_flags[rule[1]] |= 0x01;
1504 i += 2;
1505 break;
1506 case RULE_READINT:
1507 /* [ width | (endianess << 5) | (signedness << 6), tag ] */
1508 if (rule[1] > JANET_MAX_READINT_WIDTH) goto bad;
1509 i += 3;
1510 break;
1511 default:
1512 goto bad;
1513 }
1514 }
1515
1516 /* last instruction cannot overflow */
1517 if (i != blen) goto bad;
1518
1519 /* Make sure all referenced instructions are actually
1520 * in instruction positions. */
1521 for (i = 0; i < blen; i++)
1522 if (op_flags[i] == 0x01) goto bad;
1523
1524 /* Good return */
1525 peg->bytecode = bytecode;
1526 peg->constants = constants;
1527 peg->has_backref = has_backref;
1528 janet_free(op_flags);
1529 return peg;
1530
1531 bad:
1532 janet_free(op_flags);
1533 janet_panic("invalid peg bytecode");
1534 }
1535
1536 static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out);
1537 static Janet peg_next(void *p, Janet key);
1538
1539 const JanetAbstractType janet_peg_type = {
1540 "core/peg",
1541 NULL,
1542 peg_mark,
1543 cfun_peg_getter,
1544 NULL, /* put */
1545 peg_marshal,
1546 peg_unmarshal,
1547 NULL, /* tostring */
1548 NULL, /* compare */
1549 NULL, /* hash */
1550 peg_next,
1551 JANET_ATEND_NEXT
1552 };
1553
1554 /* Convert Builder to JanetPeg (Janet Abstract Value) */
make_peg(Builder * b)1555 static JanetPeg *make_peg(Builder *b) {
1556 size_t bytecode_start = size_padded(sizeof(JanetPeg), sizeof(uint32_t));
1557 size_t bytecode_size = janet_v_count(b->bytecode) * sizeof(uint32_t);
1558 size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
1559 size_t constants_size = janet_v_count(b->constants) * sizeof(Janet);
1560 size_t total_size = constants_start + constants_size;
1561 char *mem = janet_abstract(&janet_peg_type, total_size);
1562 JanetPeg *peg = (JanetPeg *)mem;
1563 peg->bytecode = (uint32_t *)(mem + bytecode_start);
1564 peg->constants = (Janet *)(mem + constants_start);
1565 peg->num_constants = janet_v_count(b->constants);
1566 safe_memcpy(peg->bytecode, b->bytecode, bytecode_size);
1567 safe_memcpy(peg->constants, b->constants, constants_size);
1568 peg->bytecode_len = janet_v_count(b->bytecode);
1569 peg->has_backref = b->has_backref;
1570 return peg;
1571 }
1572
1573 /* Compiler entry point */
compile_peg(Janet x)1574 static JanetPeg *compile_peg(Janet x) {
1575 Builder builder;
1576 builder.grammar = janet_table(0);
1577 builder.default_grammar = NULL;
1578 {
1579 Janet default_grammarv = janet_dyn("peg-grammar");
1580 if (janet_checktype(default_grammarv, JANET_TABLE)) {
1581 builder.default_grammar = janet_unwrap_table(default_grammarv);
1582 }
1583 }
1584 builder.tags = janet_table(0);
1585 builder.constants = NULL;
1586 builder.bytecode = NULL;
1587 builder.nexttag = 1;
1588 builder.form = x;
1589 builder.depth = JANET_RECURSION_GUARD;
1590 builder.has_backref = 0;
1591 peg_compile1(&builder, x);
1592 JanetPeg *peg = make_peg(&builder);
1593 builder_cleanup(&builder);
1594 return peg;
1595 }
1596
1597 /*
1598 * C Functions
1599 */
1600
1601 JANET_CORE_FN(cfun_peg_compile,
1602 "(peg/compile peg)",
1603 "Compiles a peg source data structure into a <core/peg>. This will speed up matching "
1604 "if the same peg will be used multiple times. Will also use `(dyn :peg-grammar)` to suppliment "
1605 "the grammar of the peg for otherwise undefined peg keywords.") {
1606 janet_fixarity(argc, 1);
1607 JanetPeg *peg = compile_peg(argv[0]);
1608 return janet_wrap_abstract(peg);
1609 }
1610
1611 /* Common data for peg cfunctions */
1612 typedef struct {
1613 JanetPeg *peg;
1614 PegState s;
1615 JanetByteView bytes;
1616 JanetByteView repl;
1617 int32_t start;
1618 } PegCall;
1619
1620 /* Initialize state for peg cfunctions */
peg_cfun_init(int32_t argc,Janet * argv,int get_replace)1621 static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
1622 PegCall ret;
1623 int32_t min = get_replace ? 3 : 2;
1624 janet_arity(argc, get_replace, -1);
1625 if (janet_checktype(argv[0], JANET_ABSTRACT) &&
1626 janet_abstract_type(janet_unwrap_abstract(argv[0])) == &janet_peg_type) {
1627 ret.peg = janet_unwrap_abstract(argv[0]);
1628 } else {
1629 ret.peg = compile_peg(argv[0]);
1630 }
1631 if (get_replace) {
1632 ret.repl = janet_getbytes(argv, 1);
1633 ret.bytes = janet_getbytes(argv, 2);
1634 } else {
1635 ret.bytes = janet_getbytes(argv, 1);
1636 }
1637 if (argc > min) {
1638 ret.start = janet_gethalfrange(argv, min, ret.bytes.len, "offset");
1639 ret.s.extrac = argc - min - 1;
1640 ret.s.extrav = janet_tuple_n(argv + min + 1, argc - min - 1);
1641 } else {
1642 ret.start = 0;
1643 ret.s.extrac = 0;
1644 ret.s.extrav = NULL;
1645 }
1646 ret.s.mode = PEG_MODE_NORMAL;
1647 ret.s.text_start = ret.bytes.bytes;
1648 ret.s.text_end = ret.bytes.bytes + ret.bytes.len;
1649 ret.s.depth = JANET_RECURSION_GUARD;
1650 ret.s.captures = janet_array(0);
1651 ret.s.tagged_captures = janet_array(0);
1652 ret.s.scratch = janet_buffer(10);
1653 ret.s.tags = janet_buffer(10);
1654 ret.s.constants = ret.peg->constants;
1655 ret.s.bytecode = ret.peg->bytecode;
1656 ret.s.linemap = NULL;
1657 ret.s.linemaplen = -1;
1658 ret.s.has_backref = ret.peg->has_backref;
1659 return ret;
1660 }
1661
peg_call_reset(PegCall * c)1662 static void peg_call_reset(PegCall *c) {
1663 c->s.captures->count = 0;
1664 c->s.scratch->count = 0;
1665 c->s.tags->count = 0;
1666 }
1667
1668 JANET_CORE_FN(cfun_peg_match,
1669 "(peg/match peg text &opt start & args)",
1670 "Match a Parsing Expression Grammar to a byte string and return an array of captured values. "
1671 "Returns nil if text does not match the language defined by peg. The syntax of PEGs is documented on the Janet website.") {
1672 PegCall c = peg_cfun_init(argc, argv, 0);
1673 const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + c.start);
1674 return result ? janet_wrap_array(c.s.captures) : janet_wrap_nil();
1675 }
1676
1677 JANET_CORE_FN(cfun_peg_find,
1678 "(peg/find peg text &opt start & args)",
1679 "Find first index where the peg matches in text. Returns an integer, or nil if not found.") {
1680 PegCall c = peg_cfun_init(argc, argv, 0);
1681 for (int32_t i = c.start; i < c.bytes.len; i++) {
1682 peg_call_reset(&c);
1683 if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
1684 return janet_wrap_integer(i);
1685 }
1686 return janet_wrap_nil();
1687 }
1688
1689 JANET_CORE_FN(cfun_peg_find_all,
1690 "(peg/find-all peg text &opt start & args)",
1691 "Find all indexes where the peg matches in text. Returns an array of integers.") {
1692 PegCall c = peg_cfun_init(argc, argv, 0);
1693 JanetArray *ret = janet_array(0);
1694 for (int32_t i = c.start; i < c.bytes.len; i++) {
1695 peg_call_reset(&c);
1696 if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
1697 janet_array_push(ret, janet_wrap_integer(i));
1698 }
1699 return janet_wrap_array(ret);
1700 }
1701
cfun_peg_replace_generic(int32_t argc,Janet * argv,int only_one)1702 static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
1703 PegCall c = peg_cfun_init(argc, argv, 1);
1704 JanetBuffer *ret = janet_buffer(0);
1705 int32_t trail = 0;
1706 for (int32_t i = c.start; i < c.bytes.len;) {
1707 peg_call_reset(&c);
1708 const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i);
1709 if (NULL != result) {
1710 if (trail < i) {
1711 janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (i - trail));
1712 trail = i;
1713 }
1714 int32_t nexti = (int32_t)(result - c.bytes.bytes);
1715 janet_buffer_push_bytes(ret, c.repl.bytes, c.repl.len);
1716 trail = nexti;
1717 if (nexti == i) nexti++;
1718 i = nexti;
1719 if (only_one) break;
1720 } else {
1721 i++;
1722 }
1723 }
1724 if (trail < c.bytes.len) {
1725 janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (c.bytes.len - trail));
1726 }
1727 return janet_wrap_buffer(ret);
1728 }
1729
1730 JANET_CORE_FN(cfun_peg_replace_all,
1731 "(peg/replace-all peg repl text &opt start & args)",
1732 "Replace all matches of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement.") {
1733 return cfun_peg_replace_generic(argc, argv, 0);
1734 }
1735
1736 JANET_CORE_FN(cfun_peg_replace,
1737 "(peg/replace peg repl text &opt start & args)",
1738 "Replace first match of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement. "
1739 "If no matches are found, returns the input string in a new buffer.") {
1740 return cfun_peg_replace_generic(argc, argv, 1);
1741 }
1742
1743 static JanetMethod peg_methods[] = {
1744 {"match", cfun_peg_match},
1745 {"find", cfun_peg_find},
1746 {"find-all", cfun_peg_find_all},
1747 {"replace", cfun_peg_replace},
1748 {"replace-all", cfun_peg_replace_all},
1749 {NULL, NULL}
1750 };
1751
cfun_peg_getter(JanetAbstract a,Janet key,Janet * out)1752 static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out) {
1753 (void) a;
1754 if (!janet_checktype(key, JANET_KEYWORD))
1755 return 0;
1756 return janet_getmethod(janet_unwrap_keyword(key), peg_methods, out);
1757 }
1758
peg_next(void * p,Janet key)1759 static Janet peg_next(void *p, Janet key) {
1760 (void) p;
1761 return janet_nextmethod(peg_methods, key);
1762 }
1763
1764 /* Load the peg module */
janet_lib_peg(JanetTable * env)1765 void janet_lib_peg(JanetTable *env) {
1766 JanetRegExt cfuns[] = {
1767 JANET_CORE_REG("peg/compile", cfun_peg_compile),
1768 JANET_CORE_REG("peg/match", cfun_peg_match),
1769 JANET_CORE_REG("peg/find", cfun_peg_find),
1770 JANET_CORE_REG("peg/find-all", cfun_peg_find_all),
1771 JANET_CORE_REG("peg/replace", cfun_peg_replace),
1772 JANET_CORE_REG("peg/replace-all", cfun_peg_replace_all),
1773 JANET_REG_END
1774 };
1775 janet_core_cfuns_ext(env, NULL, cfuns);
1776 janet_register_abstract_type(&janet_peg_type);
1777 }
1778
1779 #endif /* ifdef JANET_PEG */
1780