1 /*
2 * Copyright (c) 2021 Calvin Rose
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to
6 * deal in the Software without restriction, including without limitation the
7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 * sell copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22 
23 #ifndef JANET_AMALG
24 #include "features.h"
25 #include <janet.h>
26 #include <string.h>
27 #include "util.h"
28 #include "vector.h"
29 #include "util.h"
30 #endif
31 
32 #ifdef JANET_PEG
33 
34 /*
35  * Runtime
36  */
37 
38 /* Hold captured patterns and match state */
39 typedef struct {
40     const uint8_t *text_start;
41     const uint8_t *text_end;
42     const uint32_t *bytecode;
43     const Janet *constants;
44     JanetArray *captures;
45     JanetBuffer *scratch;
46     JanetBuffer *tags;
47     JanetArray *tagged_captures;
48     const Janet *extrav;
49     int32_t *linemap;
50     int32_t extrac;
51     int32_t depth;
52     int32_t linemaplen;
53     int32_t has_backref;
54     enum {
55         PEG_MODE_NORMAL,
56         PEG_MODE_ACCUMULATE
57     } mode;
58 } PegState;
59 
60 /* Allow backtrack with captures. We need
61  * to save state at branches, and then reload
62  * if one branch fails and try a new branch. */
63 typedef struct {
64     int32_t cap;
65     int32_t tcap;
66     int32_t scratch;
67 } CapState;
68 
69 /* Save the current capture state */
cap_save(PegState * s)70 static CapState cap_save(PegState *s) {
71     CapState cs;
72     cs.scratch = s->scratch->count;
73     cs.cap = s->captures->count;
74     cs.tcap = s->tagged_captures->count;
75     return cs;
76 }
77 
78 /* Load a saved capture state in the case of failure */
cap_load(PegState * s,CapState cs)79 static void cap_load(PegState *s, CapState cs) {
80     s->scratch->count = cs.scratch;
81     s->captures->count = cs.cap;
82     s->tags->count = cs.tcap;
83     s->tagged_captures->count = cs.tcap;
84 }
85 
86 /* Load a saved capture state in the case of success. Keeps
87  * tagged captures around for backref. */
cap_load_keept(PegState * s,CapState cs)88 static void cap_load_keept(PegState *s, CapState cs) {
89     s->scratch->count = cs.scratch;
90     s->captures->count = cs.cap;
91 }
92 
93 /* Add a capture */
pushcap(PegState * s,Janet capture,uint32_t tag)94 static void pushcap(PegState *s, Janet capture, uint32_t tag) {
95     if (s->mode == PEG_MODE_ACCUMULATE) {
96         janet_to_string_b(s->scratch, capture);
97     }
98     if (s->mode == PEG_MODE_NORMAL) {
99         janet_array_push(s->captures, capture);
100     }
101     if (s->has_backref) {
102         janet_array_push(s->tagged_captures, capture);
103         janet_buffer_push_u8(s->tags, tag);
104     }
105 }
106 
107 /* Lazily generate line map to get line and column information for PegState.
108  * line and column are 1-indexed. */
109 typedef struct {
110     int32_t line;
111     int32_t col;
112 } LineCol;
get_linecol_from_position(PegState * s,int32_t position)113 static LineCol get_linecol_from_position(PegState *s, int32_t position) {
114     /* Generate if not made yet */
115     if (s->linemaplen < 0) {
116         int32_t newline_count = 0;
117         for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
118             if (*c == '\n') newline_count++;
119         }
120         int32_t *mem = janet_smalloc(sizeof(int32_t) * newline_count);
121         size_t index = 0;
122         for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
123             if (*c == '\n') mem[index++] = (int32_t)(c - s->text_start);
124         }
125         s->linemaplen = newline_count;
126         s->linemap = mem;
127     }
128     /* Do binary search for line. Slightly modified from classic binary search:
129      * - if we find that our current character is a line break, just return immediately.
130      *   a newline character is consider to be on the same line as the character before
131      *   (\n is line terminator, not line separator).
132      * - in the not-found case, we still want to find the greatest-indexed newline that
133      *   is before position. we use that to calcuate the line and column.
134      * - in the case that lo = 0 and s->linemap[0] is still greater than position, we
135      *   are on the first line and our column is position + 1. */
136     int32_t hi = s->linemaplen; /* hi is greater than the actual line */
137     int32_t lo = 0; /* lo is less than or equal to the actual line */
138     LineCol ret;
139     while (lo + 1 < hi) {
140         int32_t mid = lo + (hi - lo) / 2;
141         if (s->linemap[mid] >= position) {
142             hi = mid;
143         } else {
144             lo = mid;
145         }
146     }
147     /* first line case */
148     if (s->linemaplen == 0 || (lo == 0 && s->linemap[0] >= position)) {
149         ret.line = 1;
150         ret.col = position + 1;
151     } else {
152         ret.line = lo + 2;
153         ret.col = position - s->linemap[lo];
154     }
155     return ret;
156 }
157 
158 /* Convert a uint64_t to a int64_t by wrapping to a maximum number of bytes */
peg_convert_u64_s64(uint64_t from,int width)159 static int64_t peg_convert_u64_s64(uint64_t from, int width) {
160     int shift = 8 * (8 - width);
161     return ((int64_t)(from << shift)) >> shift;
162 }
163 
164 /* Prevent stack overflow */
165 #define down1(s) do { \
166     if (0 == --((s)->depth)) janet_panic("peg/match recursed too deeply"); \
167 } while (0)
168 #define up1(s) ((s)->depth++)
169 
170 /* Evaluate a peg rule
171  * Pre-conditions: s is in a valid state
172  * Post-conditions: If there is a match, returns a pointer to the next text.
173  * All captures on the capture stack are valid. If there is no match,
174  * returns NULL. Extra captures from successful child expressions can be
175  * left on the capture stack.
176  */
peg_rule(PegState * s,const uint32_t * rule,const uint8_t * text)177 static const uint8_t *peg_rule(
178     PegState *s,
179     const uint32_t *rule,
180     const uint8_t *text) {
181 tail:
182     switch (*rule & 0x1F) {
183         default:
184             janet_panic("unexpected opcode");
185             return NULL;
186 
187         case RULE_LITERAL: {
188             uint32_t len = rule[1];
189             if (text + len > s->text_end) return NULL;
190             return memcmp(text, rule + 2, len) ? NULL : text + len;
191         }
192 
193         case RULE_NCHAR: {
194             uint32_t n = rule[1];
195             return (text + n > s->text_end) ? NULL : text + n;
196         }
197 
198         case RULE_NOTNCHAR: {
199             uint32_t n = rule[1];
200             return (text + n > s->text_end) ? text : NULL;
201         }
202 
203         case RULE_RANGE: {
204             uint8_t lo = rule[1] & 0xFF;
205             uint8_t hi = (rule[1] >> 16) & 0xFF;
206             return (text < s->text_end &&
207                     text[0] >= lo &&
208                     text[0] <= hi)
209                    ? text + 1
210                    : NULL;
211         }
212 
213         case RULE_SET: {
214             uint32_t word = rule[1 + (text[0] >> 5)];
215             uint32_t mask = (uint32_t)1 << (text[0] & 0x1F);
216             return (text < s->text_end && (word & mask))
217                    ? text + 1
218                    : NULL;
219         }
220 
221         case RULE_LOOK: {
222             text += ((int32_t *)rule)[1];
223             if (text < s->text_start || text > s->text_end) return NULL;
224             down1(s);
225             const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text);
226             up1(s);
227             text -= ((int32_t *)rule)[1];
228             return result ? text : NULL;
229         }
230 
231         case RULE_CHOICE: {
232             uint32_t len = rule[1];
233             const uint32_t *args = rule + 2;
234             if (len == 0) return NULL;
235             down1(s);
236             CapState cs = cap_save(s);
237             for (uint32_t i = 0; i < len - 1; i++) {
238                 const uint8_t *result = peg_rule(s, s->bytecode + args[i], text);
239                 if (result) {
240                     up1(s);
241                     return result;
242                 }
243                 cap_load(s, cs);
244             }
245             up1(s);
246             rule = s->bytecode + args[len - 1];
247             goto tail;
248         }
249 
250         case RULE_SEQUENCE: {
251             uint32_t len = rule[1];
252             const uint32_t *args = rule + 2;
253             if (len == 0) return text;
254             down1(s);
255             for (uint32_t i = 0; text && i < len - 1; i++)
256                 text = peg_rule(s, s->bytecode + args[i], text);
257             up1(s);
258             if (!text) return NULL;
259             rule = s->bytecode + args[len - 1];
260             goto tail;
261         }
262 
263         case RULE_IF:
264         case RULE_IFNOT: {
265             const uint32_t *rule_a = s->bytecode + rule[1];
266             const uint32_t *rule_b = s->bytecode + rule[2];
267             down1(s);
268             const uint8_t *result = peg_rule(s, rule_a, text);
269             up1(s);
270             if (rule[0] == RULE_IF ? !result : !!result) return NULL;
271             rule = rule_b;
272             goto tail;
273         }
274 
275         case RULE_NOT: {
276             const uint32_t *rule_a = s->bytecode + rule[1];
277             down1(s);
278             const uint8_t *result = peg_rule(s, rule_a, text);
279             up1(s);
280             return (result) ? NULL : text;
281         }
282 
283         case RULE_THRU:
284         case RULE_TO: {
285             const uint32_t *rule_a = s->bytecode + rule[1];
286             const uint8_t *next_text;
287             CapState cs = cap_save(s);
288             down1(s);
289             while (text <= s->text_end) {
290                 CapState cs2 = cap_save(s);
291                 next_text = peg_rule(s, rule_a, text);
292                 if (next_text) {
293                     if (rule[0] == RULE_TO) cap_load(s, cs2);
294                     break;
295                 }
296                 text++;
297             }
298             up1(s);
299             if (text > s->text_end) {
300                 cap_load(s, cs);
301                 return NULL;
302             }
303             return rule[0] == RULE_TO ? text : next_text;
304         }
305 
306         case RULE_BETWEEN: {
307             uint32_t lo = rule[1];
308             uint32_t hi = rule[2];
309             const uint32_t *rule_a = s->bytecode + rule[3];
310             uint32_t captured = 0;
311             const uint8_t *next_text;
312             CapState cs = cap_save(s);
313             down1(s);
314             while (captured < hi) {
315                 CapState cs2 = cap_save(s);
316                 next_text = peg_rule(s, rule_a, text);
317                 if (!next_text || next_text == text) {
318                     cap_load(s, cs2);
319                     break;
320                 }
321                 captured++;
322                 text = next_text;
323             }
324             up1(s);
325             if (captured < lo) {
326                 cap_load(s, cs);
327                 return NULL;
328             }
329             return text;
330         }
331 
332         /* Capturing rules */
333 
334         case RULE_GETTAG: {
335             uint32_t search = rule[1];
336             uint32_t tag = rule[2];
337             for (int32_t i = s->tags->count - 1; i >= 0; i--) {
338                 if (s->tags->data[i] == search) {
339                     pushcap(s, s->tagged_captures->data[i], tag);
340                     return text;
341                 }
342             }
343             return NULL;
344         }
345 
346         case RULE_POSITION: {
347             pushcap(s, janet_wrap_number((double)(text - s->text_start)), rule[1]);
348             return text;
349         }
350 
351         case RULE_LINE: {
352             LineCol lc = get_linecol_from_position(s, (int32_t)(text - s->text_start));
353             pushcap(s, janet_wrap_number((double)(lc.line)), rule[1]);
354             return text;
355         }
356 
357         case RULE_COLUMN: {
358             LineCol lc = get_linecol_from_position(s, (int32_t)(text - s->text_start));
359             pushcap(s, janet_wrap_number((double)(lc.col)), rule[1]);
360             return text;
361         }
362 
363         case RULE_ARGUMENT: {
364             int32_t index = ((int32_t *)rule)[1];
365             Janet capture = (index >= s->extrac) ? janet_wrap_nil() : s->extrav[index];
366             pushcap(s, capture, rule[2]);
367             return text;
368         }
369 
370         case RULE_CONSTANT: {
371             pushcap(s, s->constants[rule[1]], rule[2]);
372             return text;
373         }
374 
375         case RULE_CAPTURE: {
376             down1(s);
377             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
378             up1(s);
379             if (!result) return NULL;
380             /* Specialized pushcap - avoid intermediate string creation */
381             if (!s->has_backref && s->mode == PEG_MODE_ACCUMULATE) {
382                 janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
383             } else {
384                 uint32_t tag = rule[2];
385                 pushcap(s, janet_stringv(text, (int32_t)(result - text)), tag);
386             }
387             return result;
388         }
389 
390         case RULE_CAPTURE_NUM: {
391             down1(s);
392             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
393             up1(s);
394             if (!result) return NULL;
395             /* check number parsing */
396             double x = 0.0;
397             int32_t base = (int32_t) rule[2];
398             if (janet_scan_number_base(text, (int32_t)(result - text), base, &x)) return NULL;
399             /* Specialized pushcap - avoid intermediate string creation */
400             if (!s->has_backref && s->mode == PEG_MODE_ACCUMULATE) {
401                 janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
402             } else {
403                 uint32_t tag = rule[3];
404                 pushcap(s, janet_wrap_number(x), tag);
405             }
406             return result;
407         }
408 
409         case RULE_ACCUMULATE: {
410             uint32_t tag = rule[2];
411             int oldmode = s->mode;
412             if (!tag && oldmode == PEG_MODE_ACCUMULATE) {
413                 rule = s->bytecode + rule[1];
414                 goto tail;
415             }
416             CapState cs = cap_save(s);
417             s->mode = PEG_MODE_ACCUMULATE;
418             down1(s);
419             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
420             up1(s);
421             s->mode = oldmode;
422             if (!result) return NULL;
423             Janet cap = janet_stringv(s->scratch->data + cs.scratch,
424                                       s->scratch->count - cs.scratch);
425             cap_load_keept(s, cs);
426             pushcap(s, cap, tag);
427             return result;
428         }
429 
430         case RULE_DROP: {
431             CapState cs = cap_save(s);
432             down1(s);
433             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
434             up1(s);
435             if (!result) return NULL;
436             cap_load(s, cs);
437             return result;
438         }
439 
440         case RULE_GROUP: {
441             uint32_t tag = rule[2];
442             int oldmode = s->mode;
443             CapState cs = cap_save(s);
444             s->mode = PEG_MODE_NORMAL;
445             down1(s);
446             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
447             up1(s);
448             s->mode = oldmode;
449             if (!result) return NULL;
450             int32_t num_sub_captures = s->captures->count - cs.cap;
451             JanetArray *sub_captures = janet_array(num_sub_captures);
452             safe_memcpy(sub_captures->data,
453                         s->captures->data + cs.cap,
454                         sizeof(Janet) * num_sub_captures);
455             sub_captures->count = num_sub_captures;
456             cap_load_keept(s, cs);
457             pushcap(s, janet_wrap_array(sub_captures), tag);
458             return result;
459         }
460 
461         case RULE_REPLACE:
462         case RULE_MATCHTIME: {
463             uint32_t tag = rule[3];
464             int oldmode = s->mode;
465             CapState cs = cap_save(s);
466             s->mode = PEG_MODE_NORMAL;
467             down1(s);
468             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
469             up1(s);
470             s->mode = oldmode;
471             if (!result) return NULL;
472 
473             Janet cap = janet_wrap_nil();
474             Janet constant = s->constants[rule[2]];
475             switch (janet_type(constant)) {
476                 default:
477                     cap = constant;
478                     break;
479                 case JANET_STRUCT:
480                     if (s->captures->count) {
481                         cap = janet_struct_get(janet_unwrap_struct(constant),
482                                                s->captures->data[s->captures->count - 1]);
483                     }
484                     break;
485                 case JANET_TABLE:
486                     if (s->captures->count) {
487                         cap = janet_table_get(janet_unwrap_table(constant),
488                                               s->captures->data[s->captures->count - 1]);
489                     }
490                     break;
491                 case JANET_CFUNCTION:
492                     cap = janet_unwrap_cfunction(constant)(s->captures->count - cs.cap,
493                                                            s->captures->data + cs.cap);
494                     break;
495                 case JANET_FUNCTION:
496                     cap = janet_call(janet_unwrap_function(constant),
497                                      s->captures->count - cs.cap,
498                                      s->captures->data + cs.cap);
499                     break;
500             }
501             cap_load_keept(s, cs);
502             if (rule[0] == RULE_MATCHTIME && !janet_truthy(cap)) return NULL;
503             pushcap(s, cap, tag);
504             return result;
505         }
506 
507         case RULE_ERROR: {
508             int oldmode = s->mode;
509             s->mode = PEG_MODE_NORMAL;
510             int32_t old_cap = s->captures->count;
511             down1(s);
512             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
513             up1(s);
514             s->mode = oldmode;
515             if (!result) return NULL;
516             if (s->captures->count > old_cap) {
517                 /* Throw last capture */
518                 janet_panicv(s->captures->data[s->captures->count - 1]);
519             } else {
520                 /* Throw generic error */
521                 int32_t start = (int32_t)(text - s->text_start);
522                 LineCol lc = get_linecol_from_position(s, start);
523                 janet_panicf("match error at line %d, column %d", lc.line, lc.col);
524             }
525             return NULL;
526         }
527 
528         case RULE_BACKMATCH: {
529             uint32_t search = rule[1];
530             for (int32_t i = s->tags->count - 1; i >= 0; i--) {
531                 if (s->tags->data[i] == search) {
532                     Janet capture = s->tagged_captures->data[i];
533                     if (!janet_checktype(capture, JANET_STRING))
534                         return NULL;
535                     const uint8_t *bytes = janet_unwrap_string(capture);
536                     int32_t len = janet_string_length(bytes);
537                     if (text + len > s->text_end)
538                         return NULL;
539                     return memcmp(text, bytes, len) ? NULL : text + len;
540                 }
541             }
542             return NULL;
543         }
544 
545         case RULE_LENPREFIX: {
546             int oldmode = s->mode;
547             s->mode = PEG_MODE_NORMAL;
548             const uint8_t *next_text;
549             CapState cs = cap_save(s);
550             down1(s);
551             next_text = peg_rule(s, s->bytecode + rule[1], text);
552             up1(s);
553             if (NULL == next_text) return NULL;
554             s->mode = oldmode;
555             int32_t num_sub_captures = s->captures->count - cs.cap;
556             Janet lencap;
557             if (num_sub_captures <= 0 ||
558                     (lencap = s->captures->data[cs.cap], !janet_checkint(lencap))) {
559                 cap_load(s, cs);
560                 return NULL;
561             }
562             int32_t nrep = janet_unwrap_integer(lencap);
563             /* drop captures from len pattern */
564             cap_load(s, cs);
565             for (int32_t i = 0; i < nrep; i++) {
566                 down1(s);
567                 next_text = peg_rule(s, s->bytecode + rule[2], next_text);
568                 up1(s);
569                 if (NULL == next_text) {
570                     cap_load(s, cs);
571                     return NULL;
572                 }
573             }
574             return next_text;
575         }
576 
577         case RULE_READINT: {
578             uint32_t tag = rule[2];
579             uint32_t signedness = rule[1] & 0x10;
580             uint32_t endianess = rule[1] & 0x20;
581             int width = (int)(rule[1] & 0xF);
582             if (text + width > s->text_end) return NULL;
583             uint64_t accum = 0;
584             if (endianess) {
585                 /* BE */
586                 for (int i = 0; i < width; i++) accum = (accum << 8) | text[i];
587             } else {
588                 /* LE */
589                 for (int i = width - 1; i >= 0; i--) accum = (accum << 8) | text[i];
590             }
591 
592             Janet capture_value;
593             /* We can only parse integeres of greater than 6 bytes reliable if int-types are enabled.
594              * Otherwise, we may lose precision, so 6 is the maximum size when int-types are disabled. */
595 #ifdef JANET_INT_TYPES
596             if (width > 6) {
597                 if (signedness) {
598                     capture_value = janet_wrap_s64(peg_convert_u64_s64(accum, width));
599                 } else {
600                     capture_value = janet_wrap_u64(accum);
601                 }
602             } else
603 #endif
604             {
605                 double double_value;
606                 if (signedness) {
607                     double_value = (double)(peg_convert_u64_s64(accum, width));
608                 } else {
609                     double_value = (double)accum;
610                 }
611                 capture_value = janet_wrap_number(double_value);
612             }
613 
614             pushcap(s, capture_value, tag);
615             return text + width;
616         }
617 
618         case RULE_UNREF: {
619             int32_t tcap = s->tags->count;
620             down1(s);
621             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
622             up1(s);
623             if (!result) return NULL;
624             int32_t final_tcap = s->tags->count;
625             /* Truncate tagged captures to not include items of the given tag */
626             int32_t w = tcap;
627             /* If no tag is given, drop ALL tagged captures */
628             if (rule[2]) {
629                 for (int32_t i = tcap; i < final_tcap; i++) {
630                     if (s->tags->data[i] != (0xFF & rule[2])) {
631                         s->tags->data[w] = s->tags->data[i];
632                         s->tagged_captures->data[w] = s->tagged_captures->data[i];
633                         w++;
634                     }
635                 }
636             }
637             s->tags->count = w;
638             s->tagged_captures->count = w;
639             return result;
640         }
641 
642     }
643 }
644 
645 /*
646  * Compilation
647  */
648 
649 typedef struct {
650     JanetTable *grammar;
651     JanetTable *default_grammar;
652     JanetTable *tags;
653     Janet *constants;
654     uint32_t *bytecode;
655     Janet form;
656     int depth;
657     uint32_t nexttag;
658     int has_backref;
659 } Builder;
660 
661 /* Forward declaration to allow recursion */
662 static uint32_t peg_compile1(Builder *b, Janet peg);
663 
664 /*
665  * Errors
666  */
667 
builder_cleanup(Builder * b)668 static void builder_cleanup(Builder *b) {
669     janet_v_free(b->constants);
670     janet_v_free(b->bytecode);
671 }
672 
peg_panic(Builder * b,const char * msg)673 JANET_NO_RETURN static void peg_panic(Builder *b, const char *msg) {
674     builder_cleanup(b);
675     janet_panicf("grammar error in %p, %s", b->form, msg);
676 }
677 
678 #define peg_panicf(b,...) peg_panic((b), (const char *) janet_formatc(__VA_ARGS__))
679 
peg_fixarity(Builder * b,int32_t argc,int32_t arity)680 static void peg_fixarity(Builder *b, int32_t argc, int32_t arity) {
681     if (argc != arity) {
682         peg_panicf(b, "expected %d argument%s, got %d",
683                    arity,
684                    arity == 1 ? "" : "s",
685                    argc);
686     }
687 }
688 
peg_arity(Builder * b,int32_t arity,int32_t min,int32_t max)689 static void peg_arity(Builder *b, int32_t arity, int32_t min, int32_t max) {
690     if (min >= 0 && arity < min)
691         peg_panicf(b, "arity mismatch, expected at least %d, got %d", min, arity);
692     if (max >= 0 && arity > max)
693         peg_panicf(b, "arity mismatch, expected at most %d, got %d", max, arity);
694 }
695 
peg_getset(Builder * b,Janet x)696 static const uint8_t *peg_getset(Builder *b, Janet x) {
697     if (!janet_checktype(x, JANET_STRING))
698         peg_panic(b, "expected string for character set");
699     const uint8_t *str = janet_unwrap_string(x);
700     return str;
701 }
702 
peg_getrange(Builder * b,Janet x)703 static const uint8_t *peg_getrange(Builder *b, Janet x) {
704     if (!janet_checktype(x, JANET_STRING))
705         peg_panic(b, "expected string for character range");
706     const uint8_t *str = janet_unwrap_string(x);
707     if (janet_string_length(str) != 2)
708         peg_panicf(b, "expected string to have length 2, got %v", x);
709     if (str[1] < str[0])
710         peg_panicf(b, "range %v is empty", x);
711     return str;
712 }
713 
peg_getinteger(Builder * b,Janet x)714 static int32_t peg_getinteger(Builder *b, Janet x) {
715     if (!janet_checkint(x))
716         peg_panicf(b, "expected integer, got %v", x);
717     return janet_unwrap_integer(x);
718 }
719 
peg_getnat(Builder * b,Janet x)720 static int32_t peg_getnat(Builder *b, Janet x) {
721     int32_t i = peg_getinteger(b, x);
722     if (i < 0)
723         peg_panicf(b, "expected non-negative integer, got %v", x);
724     return i;
725 }
726 
727 /*
728  * Emission
729  */
730 
emit_constant(Builder * b,Janet c)731 static uint32_t emit_constant(Builder *b, Janet c) {
732     uint32_t cindex = (uint32_t) janet_v_count(b->constants);
733     janet_v_push(b->constants, c);
734     return cindex;
735 }
736 
emit_tag(Builder * b,Janet t)737 static uint32_t emit_tag(Builder *b, Janet t) {
738     if (!janet_checktype(t, JANET_KEYWORD))
739         peg_panicf(b, "expected keyword for capture tag, got %v", t);
740     Janet check = janet_table_get(b->tags, t);
741     if (janet_checktype(check, JANET_NIL)) {
742         uint32_t tag = b->nexttag++;
743         if (tag > 255) {
744             peg_panic(b, "too many tags - up to 255 tags are supported per peg");
745         }
746         Janet val = janet_wrap_number(tag);
747         janet_table_put(b->tags, t, val);
748         return tag;
749     } else {
750         return (uint32_t) janet_unwrap_number(check);
751     }
752 }
753 
754 /* Reserve space in bytecode for a rule. When a special emits a rule,
755  * it must place that rule immediately on the bytecode stack. This lets
756  * the compiler know where the rule is going to be before it is complete,
757  * allowing recursive rules. */
758 typedef struct {
759     Builder *builder;
760     uint32_t index;
761     int32_t size;
762 } Reserve;
763 
reserve(Builder * b,int32_t size)764 static Reserve reserve(Builder *b, int32_t size) {
765     Reserve r;
766     r.index = janet_v_count(b->bytecode);
767     r.builder = b;
768     r.size = size;
769     for (int32_t i = 0; i < size; i++)
770         janet_v_push(b->bytecode, 0);
771     return r;
772 }
773 
774 /* Emit a rule in the builder. Returns the index of the new rule */
emit_rule(Reserve r,int32_t op,int32_t n,const uint32_t * body)775 static void emit_rule(Reserve r, int32_t op, int32_t n, const uint32_t *body) {
776     janet_assert(r.size == n + 1, "bad reserve");
777     r.builder->bytecode[r.index] = op;
778     memcpy(r.builder->bytecode + r.index + 1, body, n * sizeof(uint32_t));
779 }
780 
781 /* For RULE_LITERAL */
emit_bytes(Builder * b,uint32_t op,int32_t len,const uint8_t * bytes)782 static void emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *bytes) {
783     uint32_t next_rule = janet_v_count(b->bytecode);
784     janet_v_push(b->bytecode, op);
785     janet_v_push(b->bytecode, len);
786     int32_t words = ((len + 3) >> 2);
787     for (int32_t i = 0; i < words; i++)
788         janet_v_push(b->bytecode, 0);
789     memcpy(b->bytecode + next_rule + 2, bytes, len);
790 }
791 
792 /* For fixed arity rules of arities 1, 2, and 3 */
emit_1(Reserve r,uint32_t op,uint32_t arg)793 static void emit_1(Reserve r, uint32_t op, uint32_t arg) {
794     emit_rule(r, op, 1, &arg);
795 }
emit_2(Reserve r,uint32_t op,uint32_t arg1,uint32_t arg2)796 static void emit_2(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2) {
797     uint32_t arr[2] = {arg1, arg2};
798     emit_rule(r, op, 2, arr);
799 }
emit_3(Reserve r,uint32_t op,uint32_t arg1,uint32_t arg2,uint32_t arg3)800 static void emit_3(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2, uint32_t arg3) {
801     uint32_t arr[3] = {arg1, arg2, arg3};
802     emit_rule(r, op, 3, arr);
803 }
804 
805 /*
806  * Specials
807  */
808 
bitmap_set(uint32_t * bitmap,uint8_t c)809 static void bitmap_set(uint32_t *bitmap, uint8_t c) {
810     bitmap[c >> 5] |= ((uint32_t)1) << (c & 0x1F);
811 }
812 
spec_range(Builder * b,int32_t argc,const Janet * argv)813 static void spec_range(Builder *b, int32_t argc, const Janet *argv) {
814     peg_arity(b, argc, 1, -1);
815     if (argc == 1) {
816         Reserve r = reserve(b, 2);
817         const uint8_t *str = peg_getrange(b, argv[0]);
818         uint32_t arg = str[0] | (str[1] << 16);
819         emit_1(r, RULE_RANGE, arg);
820     } else {
821         /* Compile as a set */
822         Reserve r = reserve(b, 9);
823         uint32_t bitmap[8] = {0};
824         for (int32_t i = 0; i < argc; i++) {
825             const uint8_t *str = peg_getrange(b, argv[i]);
826             for (uint32_t c = str[0]; c <= str[1]; c++)
827                 bitmap_set(bitmap, c);
828         }
829         emit_rule(r, RULE_SET, 8, bitmap);
830     }
831 }
832 
spec_set(Builder * b,int32_t argc,const Janet * argv)833 static void spec_set(Builder *b, int32_t argc, const Janet *argv) {
834     peg_fixarity(b, argc, 1);
835     Reserve r = reserve(b, 9);
836     const uint8_t *str = peg_getset(b, argv[0]);
837     uint32_t bitmap[8] = {0};
838     for (int32_t i = 0; i < janet_string_length(str); i++)
839         bitmap_set(bitmap, str[i]);
840     emit_rule(r, RULE_SET, 8, bitmap);
841 }
842 
spec_look(Builder * b,int32_t argc,const Janet * argv)843 static void spec_look(Builder *b, int32_t argc, const Janet *argv) {
844     peg_arity(b, argc, 1, 2);
845     Reserve r = reserve(b, 3);
846     int32_t rulearg = argc == 2 ? 1 : 0;
847     int32_t offset = argc == 2 ? peg_getinteger(b, argv[0]) : 0;
848     uint32_t subrule = peg_compile1(b, argv[rulearg]);
849     emit_2(r, RULE_LOOK, (uint32_t) offset, subrule);
850 }
851 
852 /* Rule of the form [len, rules...] */
spec_variadic(Builder * b,int32_t argc,const Janet * argv,uint32_t op)853 static void spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
854     uint32_t rule = janet_v_count(b->bytecode);
855     janet_v_push(b->bytecode, op);
856     janet_v_push(b->bytecode, argc);
857     for (int32_t i = 0; i < argc; i++)
858         janet_v_push(b->bytecode, 0);
859     for (int32_t i = 0; i < argc; i++) {
860         uint32_t rulei = peg_compile1(b, argv[i]);
861         b->bytecode[rule + 2 + i] = rulei;
862     }
863 }
864 
spec_choice(Builder * b,int32_t argc,const Janet * argv)865 static void spec_choice(Builder *b, int32_t argc, const Janet *argv) {
866     spec_variadic(b, argc, argv, RULE_CHOICE);
867 }
spec_sequence(Builder * b,int32_t argc,const Janet * argv)868 static void spec_sequence(Builder *b, int32_t argc, const Janet *argv) {
869     spec_variadic(b, argc, argv, RULE_SEQUENCE);
870 }
871 
872 /* For (if a b) and (if-not a b) */
spec_branch(Builder * b,int32_t argc,const Janet * argv,uint32_t rule)873 static void spec_branch(Builder *b, int32_t argc, const Janet *argv, uint32_t rule) {
874     peg_fixarity(b, argc, 2);
875     Reserve r = reserve(b, 3);
876     uint32_t rule_a = peg_compile1(b, argv[0]);
877     uint32_t rule_b = peg_compile1(b, argv[1]);
878     emit_2(r, rule, rule_a, rule_b);
879 }
880 
spec_if(Builder * b,int32_t argc,const Janet * argv)881 static void spec_if(Builder *b, int32_t argc, const Janet *argv) {
882     spec_branch(b, argc, argv, RULE_IF);
883 }
spec_ifnot(Builder * b,int32_t argc,const Janet * argv)884 static void spec_ifnot(Builder *b, int32_t argc, const Janet *argv) {
885     spec_branch(b, argc, argv, RULE_IFNOT);
886 }
spec_lenprefix(Builder * b,int32_t argc,const Janet * argv)887 static void spec_lenprefix(Builder *b, int32_t argc, const Janet *argv) {
888     spec_branch(b, argc, argv, RULE_LENPREFIX);
889 }
890 
spec_between(Builder * b,int32_t argc,const Janet * argv)891 static void spec_between(Builder *b, int32_t argc, const Janet *argv) {
892     peg_fixarity(b, argc, 3);
893     Reserve r = reserve(b, 4);
894     int32_t lo = peg_getnat(b, argv[0]);
895     int32_t hi = peg_getnat(b, argv[1]);
896     uint32_t subrule = peg_compile1(b, argv[2]);
897     emit_3(r, RULE_BETWEEN, lo, hi, subrule);
898 }
899 
spec_repeater(Builder * b,int32_t argc,const Janet * argv,int32_t min)900 static void spec_repeater(Builder *b, int32_t argc, const Janet *argv, int32_t min) {
901     peg_fixarity(b, argc, 1);
902     Reserve r = reserve(b, 4);
903     uint32_t subrule = peg_compile1(b, argv[0]);
904     emit_3(r, RULE_BETWEEN, min, UINT32_MAX, subrule);
905 }
906 
spec_some(Builder * b,int32_t argc,const Janet * argv)907 static void spec_some(Builder *b, int32_t argc, const Janet *argv) {
908     spec_repeater(b, argc, argv, 1);
909 }
spec_any(Builder * b,int32_t argc,const Janet * argv)910 static void spec_any(Builder *b, int32_t argc, const Janet *argv) {
911     spec_repeater(b, argc, argv, 0);
912 }
913 
spec_atleast(Builder * b,int32_t argc,const Janet * argv)914 static void spec_atleast(Builder *b, int32_t argc, const Janet *argv) {
915     peg_fixarity(b, argc, 2);
916     Reserve r = reserve(b, 4);
917     int32_t n = peg_getnat(b, argv[0]);
918     uint32_t subrule = peg_compile1(b, argv[1]);
919     emit_3(r, RULE_BETWEEN, n, UINT32_MAX, subrule);
920 }
921 
spec_atmost(Builder * b,int32_t argc,const Janet * argv)922 static void spec_atmost(Builder *b, int32_t argc, const Janet *argv) {
923     peg_fixarity(b, argc, 2);
924     Reserve r = reserve(b, 4);
925     int32_t n = peg_getnat(b, argv[0]);
926     uint32_t subrule = peg_compile1(b, argv[1]);
927     emit_3(r, RULE_BETWEEN, 0, n, subrule);
928 }
929 
spec_opt(Builder * b,int32_t argc,const Janet * argv)930 static void spec_opt(Builder *b, int32_t argc, const Janet *argv) {
931     peg_fixarity(b, argc, 1);
932     Reserve r = reserve(b, 4);
933     uint32_t subrule = peg_compile1(b, argv[0]);
934     emit_3(r, RULE_BETWEEN, 0, 1, subrule);
935 }
936 
spec_repeat(Builder * b,int32_t argc,const Janet * argv)937 static void spec_repeat(Builder *b, int32_t argc, const Janet *argv) {
938     peg_fixarity(b, argc, 2);
939     Reserve r = reserve(b, 4);
940     int32_t n = peg_getnat(b, argv[0]);
941     uint32_t subrule = peg_compile1(b, argv[1]);
942     emit_3(r, RULE_BETWEEN, n, n, subrule);
943 }
944 
945 /* Rule of the form [rule] */
spec_onerule(Builder * b,int32_t argc,const Janet * argv,uint32_t op)946 static void spec_onerule(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
947     peg_fixarity(b, argc, 1);
948     Reserve r = reserve(b, 2);
949     uint32_t rule = peg_compile1(b, argv[0]);
950     emit_1(r, op, rule);
951 }
952 
spec_not(Builder * b,int32_t argc,const Janet * argv)953 static void spec_not(Builder *b, int32_t argc, const Janet *argv) {
954     spec_onerule(b, argc, argv, RULE_NOT);
955 }
spec_error(Builder * b,int32_t argc,const Janet * argv)956 static void spec_error(Builder *b, int32_t argc, const Janet *argv) {
957     if (argc == 0) {
958         Reserve r = reserve(b, 2);
959         uint32_t rule = peg_compile1(b, janet_wrap_number(0));
960         emit_1(r, RULE_ERROR, rule);
961     } else {
962         spec_onerule(b, argc, argv, RULE_ERROR);
963     }
964 }
spec_to(Builder * b,int32_t argc,const Janet * argv)965 static void spec_to(Builder *b, int32_t argc, const Janet *argv) {
966     spec_onerule(b, argc, argv, RULE_TO);
967 }
spec_thru(Builder * b,int32_t argc,const Janet * argv)968 static void spec_thru(Builder *b, int32_t argc, const Janet *argv) {
969     spec_onerule(b, argc, argv, RULE_THRU);
970 }
spec_drop(Builder * b,int32_t argc,const Janet * argv)971 static void spec_drop(Builder *b, int32_t argc, const Janet *argv) {
972     spec_onerule(b, argc, argv, RULE_DROP);
973 }
974 
975 /* Rule of the form [rule, tag] */
spec_cap1(Builder * b,int32_t argc,const Janet * argv,uint32_t op)976 static void spec_cap1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
977     peg_arity(b, argc, 1, 2);
978     Reserve r = reserve(b, 3);
979     uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
980     uint32_t rule = peg_compile1(b, argv[0]);
981     emit_2(r, op, rule, tag);
982 }
983 
spec_capture(Builder * b,int32_t argc,const Janet * argv)984 static void spec_capture(Builder *b, int32_t argc, const Janet *argv) {
985     spec_cap1(b, argc, argv, RULE_CAPTURE);
986 }
spec_accumulate(Builder * b,int32_t argc,const Janet * argv)987 static void spec_accumulate(Builder *b, int32_t argc, const Janet *argv) {
988     spec_cap1(b, argc, argv, RULE_ACCUMULATE);
989 }
spec_group(Builder * b,int32_t argc,const Janet * argv)990 static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
991     spec_cap1(b, argc, argv, RULE_GROUP);
992 }
spec_unref(Builder * b,int32_t argc,const Janet * argv)993 static void spec_unref(Builder *b, int32_t argc, const Janet *argv) {
994     spec_cap1(b, argc, argv, RULE_UNREF);
995 }
996 
spec_capture_number(Builder * b,int32_t argc,const Janet * argv)997 static void spec_capture_number(Builder *b, int32_t argc, const Janet *argv) {
998     peg_arity(b, argc, 1, 3);
999     Reserve r = reserve(b, 4);
1000     uint32_t base = 0;
1001     if (argc >= 2) {
1002         if (!janet_checktype(argv[1], JANET_NIL)) {
1003             if (!janet_checkint(argv[1])) goto error;
1004             base = (uint32_t) janet_unwrap_integer(argv[1]);
1005             if (base < 2 || base > 36) goto error;
1006         }
1007     }
1008     uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
1009     uint32_t rule = peg_compile1(b, argv[0]);
1010     emit_3(r, RULE_CAPTURE_NUM, rule, base, tag);
1011     return;
1012 error:
1013     peg_panicf(b, "expected integer between 2 and 36, got %v", argv[2]);
1014 }
1015 
spec_reference(Builder * b,int32_t argc,const Janet * argv)1016 static void spec_reference(Builder *b, int32_t argc, const Janet *argv) {
1017     peg_arity(b, argc, 1, 2);
1018     Reserve r = reserve(b, 3);
1019     uint32_t search = emit_tag(b, argv[0]);
1020     uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1021     b->has_backref = 1;
1022     emit_2(r, RULE_GETTAG, search, tag);
1023 }
1024 
spec_tag1(Builder * b,int32_t argc,const Janet * argv,uint32_t op)1025 static void spec_tag1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
1026     peg_arity(b, argc, 0, 1);
1027     Reserve r = reserve(b, 2);
1028     uint32_t tag = (argc) ? emit_tag(b, argv[0]) : 0;
1029     (void) argv;
1030     emit_1(r, op, tag);
1031 }
1032 
spec_position(Builder * b,int32_t argc,const Janet * argv)1033 static void spec_position(Builder *b, int32_t argc, const Janet *argv) {
1034     spec_tag1(b, argc, argv, RULE_POSITION);
1035 }
spec_line(Builder * b,int32_t argc,const Janet * argv)1036 static void spec_line(Builder *b, int32_t argc, const Janet *argv) {
1037     spec_tag1(b, argc, argv, RULE_LINE);
1038 }
spec_column(Builder * b,int32_t argc,const Janet * argv)1039 static void spec_column(Builder *b, int32_t argc, const Janet *argv) {
1040     spec_tag1(b, argc, argv, RULE_COLUMN);
1041 }
1042 
spec_backmatch(Builder * b,int32_t argc,const Janet * argv)1043 static void spec_backmatch(Builder *b, int32_t argc, const Janet *argv) {
1044     b->has_backref = 1;
1045     spec_tag1(b, argc, argv, RULE_BACKMATCH);
1046 }
1047 
spec_argument(Builder * b,int32_t argc,const Janet * argv)1048 static void spec_argument(Builder *b, int32_t argc, const Janet *argv) {
1049     peg_arity(b, argc, 1, 2);
1050     Reserve r = reserve(b, 3);
1051     uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1052     int32_t index = peg_getnat(b, argv[0]);
1053     emit_2(r, RULE_ARGUMENT, index, tag);
1054 }
1055 
spec_constant(Builder * b,int32_t argc,const Janet * argv)1056 static void spec_constant(Builder *b, int32_t argc, const Janet *argv) {
1057     janet_arity(argc, 1, 2);
1058     Reserve r = reserve(b, 3);
1059     uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1060     emit_2(r, RULE_CONSTANT, emit_constant(b, argv[0]), tag);
1061 }
1062 
spec_replace(Builder * b,int32_t argc,const Janet * argv)1063 static void spec_replace(Builder *b, int32_t argc, const Janet *argv) {
1064     peg_arity(b, argc, 2, 3);
1065     Reserve r = reserve(b, 4);
1066     uint32_t subrule = peg_compile1(b, argv[0]);
1067     uint32_t constant = emit_constant(b, argv[1]);
1068     uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
1069     emit_3(r, RULE_REPLACE, subrule, constant, tag);
1070 }
1071 
spec_matchtime(Builder * b,int32_t argc,const Janet * argv)1072 static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
1073     peg_arity(b, argc, 2, 3);
1074     Reserve r = reserve(b, 4);
1075     uint32_t subrule = peg_compile1(b, argv[0]);
1076     Janet fun = argv[1];
1077     if (!janet_checktype(fun, JANET_FUNCTION) &&
1078             !janet_checktype(fun, JANET_CFUNCTION)) {
1079         peg_panicf(b, "expected function|cfunction, got %v", fun);
1080     }
1081     uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
1082     uint32_t cindex = emit_constant(b, fun);
1083     emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
1084 }
1085 
1086 #ifdef JANET_INT_TYPES
1087 #define JANET_MAX_READINT_WIDTH 8
1088 #else
1089 #define JANET_MAX_READINT_WIDTH 6
1090 #endif
1091 
spec_readint(Builder * b,int32_t argc,const Janet * argv,uint32_t mask)1092 static void spec_readint(Builder *b, int32_t argc, const Janet *argv, uint32_t mask) {
1093     peg_arity(b, argc, 1, 2);
1094     Reserve r = reserve(b, 3);
1095     uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
1096     int32_t width = peg_getnat(b, argv[0]);
1097     if ((width < 0) || (width > JANET_MAX_READINT_WIDTH)) {
1098         peg_panicf(b, "width must be between 0 and %d, got %d", JANET_MAX_READINT_WIDTH, width);
1099     }
1100     emit_2(r, RULE_READINT, mask | ((uint32_t) width), tag);
1101 }
1102 
spec_uint_le(Builder * b,int32_t argc,const Janet * argv)1103 static void spec_uint_le(Builder *b, int32_t argc, const Janet *argv) {
1104     spec_readint(b, argc, argv, 0x0u);
1105 }
spec_int_le(Builder * b,int32_t argc,const Janet * argv)1106 static void spec_int_le(Builder *b, int32_t argc, const Janet *argv) {
1107     spec_readint(b, argc, argv, 0x10u);
1108 }
spec_uint_be(Builder * b,int32_t argc,const Janet * argv)1109 static void spec_uint_be(Builder *b, int32_t argc, const Janet *argv) {
1110     spec_readint(b, argc, argv, 0x20u);
1111 }
spec_int_be(Builder * b,int32_t argc,const Janet * argv)1112 static void spec_int_be(Builder *b, int32_t argc, const Janet *argv) {
1113     spec_readint(b, argc, argv, 0x30u);
1114 }
1115 
1116 /* Special compiler form */
1117 typedef void (*Special)(Builder *b, int32_t argc, const Janet *argv);
1118 typedef struct {
1119     const char *name;
1120     Special special;
1121 } SpecialPair;
1122 
1123 /* Keep in lexical order (vim :sort works well) */
1124 static const SpecialPair peg_specials[] = {
1125     {"!", spec_not},
1126     {"$", spec_position},
1127     {"%", spec_accumulate},
1128     {"*", spec_sequence},
1129     {"+", spec_choice},
1130     {"->", spec_reference},
1131     {"/", spec_replace},
1132     {"<-", spec_capture},
1133     {">", spec_look},
1134     {"?", spec_opt},
1135     {"accumulate", spec_accumulate},
1136     {"any", spec_any},
1137     {"argument", spec_argument},
1138     {"at-least", spec_atleast},
1139     {"at-most", spec_atmost},
1140     {"backmatch", spec_backmatch},
1141     {"backref", spec_reference},
1142     {"between", spec_between},
1143     {"capture", spec_capture},
1144     {"choice", spec_choice},
1145     {"cmt", spec_matchtime},
1146     {"column", spec_column},
1147     {"constant", spec_constant},
1148     {"drop", spec_drop},
1149     {"error", spec_error},
1150     {"group", spec_group},
1151     {"if", spec_if},
1152     {"if-not", spec_ifnot},
1153     {"int", spec_int_le},
1154     {"int-be", spec_int_be},
1155     {"lenprefix", spec_lenprefix},
1156     {"line", spec_line},
1157     {"look", spec_look},
1158     {"not", spec_not},
1159     {"number", spec_capture_number},
1160     {"opt", spec_opt},
1161     {"position", spec_position},
1162     {"quote", spec_capture},
1163     {"range", spec_range},
1164     {"repeat", spec_repeat},
1165     {"replace", spec_replace},
1166     {"sequence", spec_sequence},
1167     {"set", spec_set},
1168     {"some", spec_some},
1169     {"thru", spec_thru},
1170     {"to", spec_to},
1171     {"uint", spec_uint_le},
1172     {"uint-be", spec_uint_be},
1173     {"unref", spec_unref},
1174 };
1175 
1176 /* Compile a janet value into a rule and return the rule index. */
peg_compile1(Builder * b,Janet peg)1177 static uint32_t peg_compile1(Builder *b, Janet peg) {
1178 
1179     /* Keep track of the form being compiled for error purposes */
1180     Janet old_form = b->form;
1181     JanetTable *old_grammar = b->grammar;
1182     b->form = peg;
1183 
1184     /* Resolve keyword references */
1185     int i = JANET_RECURSION_GUARD;
1186     JanetTable *grammar = old_grammar;
1187     for (; i > 0 && janet_checktype(peg, JANET_KEYWORD); --i) {
1188         Janet nextPeg = janet_table_get_ex(grammar, peg, &grammar);
1189         if (!grammar || janet_checktype(nextPeg, JANET_NIL)) {
1190             nextPeg = (b->default_grammar == NULL)
1191                       ? janet_wrap_nil()
1192                       : janet_table_get(b->default_grammar, peg);
1193             if (janet_checktype(nextPeg, JANET_NIL)) {
1194                 peg_panic(b, "unknown rule");
1195             }
1196         }
1197         peg = nextPeg;
1198         b->form = peg;
1199         b->grammar = grammar;
1200     }
1201     if (i == 0)
1202         peg_panic(b, "reference chain too deep");
1203 
1204     /* Check cache - for tuples we check only the local cache, as
1205      * in a different grammar, the same tuple can compile to a different
1206      * rule - for example, (+ :a :b) depends on whatever :a and :b are bound to. */
1207     Janet check = janet_checktype(peg, JANET_TUPLE)
1208                   ? janet_table_rawget(grammar, peg)
1209                   : janet_table_get(grammar, peg);
1210     if (!janet_checktype(check, JANET_NIL)) {
1211         b->form = old_form;
1212         b->grammar = old_grammar;
1213         return (uint32_t) janet_unwrap_number(check);
1214     }
1215 
1216     /* Check depth */
1217     if (b->depth-- == 0)
1218         peg_panic(b, "peg grammar recursed too deeply");
1219 
1220     /* The final rule to return */
1221     uint32_t rule = janet_v_count(b->bytecode);
1222 
1223     /* Add to cache. Do not cache structs, as we don't yet know
1224      * what rule they will return! We can just as effectively cache
1225      * the structs main rule. */
1226     if (!janet_checktype(peg, JANET_STRUCT)) {
1227         JanetTable *which_grammar = grammar;
1228         /* If we are a primitive pattern, add to the global cache (root grammar table) */
1229         if (!janet_checktype(peg, JANET_TUPLE)) {
1230             while (which_grammar->proto)
1231                 which_grammar = which_grammar->proto;
1232         }
1233         janet_table_put(which_grammar, peg, janet_wrap_number(rule));
1234     }
1235 
1236     switch (janet_type(peg)) {
1237         default:
1238             peg_panic(b, "unexpected peg source");
1239             return 0;
1240         case JANET_NUMBER: {
1241             int32_t n = peg_getinteger(b, peg);
1242             Reserve r = reserve(b, 2);
1243             if (n < 0) {
1244                 emit_1(r, RULE_NOTNCHAR, -n);
1245             } else {
1246                 emit_1(r, RULE_NCHAR, n);
1247             }
1248             break;
1249         }
1250         case JANET_STRING: {
1251             const uint8_t *str = janet_unwrap_string(peg);
1252             int32_t len = janet_string_length(str);
1253             emit_bytes(b, RULE_LITERAL, len, str);
1254             break;
1255         }
1256         case JANET_TABLE: {
1257             /* Build grammar table */
1258             JanetTable *new_grammar = janet_table_clone(janet_unwrap_table(peg));
1259             new_grammar->proto = grammar;
1260             b->grammar = grammar = new_grammar;
1261             /* Run the main rule */
1262             Janet main_rule = janet_table_rawget(grammar, janet_ckeywordv("main"));
1263             if (janet_checktype(main_rule, JANET_NIL))
1264                 peg_panic(b, "grammar requires :main rule");
1265             rule = peg_compile1(b, main_rule);
1266             break;
1267         }
1268         case JANET_STRUCT: {
1269             /* Build grammar table */
1270             const JanetKV *st = janet_unwrap_struct(peg);
1271             JanetTable *new_grammar = janet_table(2 * janet_struct_capacity(st));
1272             for (int32_t i = 0; i < janet_struct_capacity(st); i++) {
1273                 if (janet_checktype(st[i].key, JANET_KEYWORD)) {
1274                     janet_table_put(new_grammar, st[i].key, st[i].value);
1275                 }
1276             }
1277             new_grammar->proto = grammar;
1278             b->grammar = grammar = new_grammar;
1279             /* Run the main rule */
1280             Janet main_rule = janet_table_rawget(grammar, janet_ckeywordv("main"));
1281             if (janet_checktype(main_rule, JANET_NIL))
1282                 peg_panic(b, "grammar requires :main rule");
1283             rule = peg_compile1(b, main_rule);
1284             break;
1285         }
1286         case JANET_TUPLE: {
1287             const Janet *tup = janet_unwrap_tuple(peg);
1288             int32_t len = janet_tuple_length(tup);
1289             if (len == 0) peg_panic(b, "tuple in grammar must have non-zero length");
1290             if (janet_checkint(tup[0])) {
1291                 int32_t n = janet_unwrap_integer(tup[0]);
1292                 if (n < 0) {
1293                     peg_panicf(b, "expected non-negative integer, got %d", n);
1294                 }
1295                 spec_repeat(b, len, tup);
1296                 break;
1297             }
1298             if (!janet_checktype(tup[0], JANET_SYMBOL))
1299                 peg_panicf(b, "expected grammar command, found %v", tup[0]);
1300             const uint8_t *sym = janet_unwrap_symbol(tup[0]);
1301             const SpecialPair *sp = janet_strbinsearch(
1302                                         &peg_specials,
1303                                         sizeof(peg_specials) / sizeof(SpecialPair),
1304                                         sizeof(SpecialPair),
1305                                         sym);
1306             if (sp) {
1307                 sp->special(b, len - 1, tup + 1);
1308             } else {
1309                 peg_panicf(b, "unknown special %S", sym);
1310             }
1311             break;
1312         }
1313     }
1314 
1315     /* Increase depth again */
1316     b->depth++;
1317     b->form = old_form;
1318     b->grammar = old_grammar;
1319     return rule;
1320 }
1321 
1322 /*
1323  * Post-Compilation
1324  */
1325 
peg_mark(void * p,size_t size)1326 static int peg_mark(void *p, size_t size) {
1327     (void) size;
1328     JanetPeg *peg = (JanetPeg *)p;
1329     if (NULL != peg->constants)
1330         for (uint32_t i = 0; i < peg->num_constants; i++)
1331             janet_mark(peg->constants[i]);
1332     return 0;
1333 }
1334 
peg_marshal(void * p,JanetMarshalContext * ctx)1335 static void peg_marshal(void *p, JanetMarshalContext *ctx) {
1336     JanetPeg *peg = (JanetPeg *)p;
1337     janet_marshal_size(ctx, peg->bytecode_len);
1338     janet_marshal_int(ctx, (int32_t)peg->num_constants);
1339     janet_marshal_abstract(ctx, p);
1340     for (size_t i = 0; i < peg->bytecode_len; i++)
1341         janet_marshal_int(ctx, (int32_t) peg->bytecode[i]);
1342     for (uint32_t j = 0; j < peg->num_constants; j++)
1343         janet_marshal_janet(ctx, peg->constants[j]);
1344 }
1345 
1346 /* Used to ensure that if we place several arrays in one memory chunk, each
1347  * array will be correctly aligned */
size_padded(size_t offset,size_t size)1348 static size_t size_padded(size_t offset, size_t size) {
1349     size_t x = size + offset - 1;
1350     return x - (x % size);
1351 }
1352 
peg_unmarshal(JanetMarshalContext * ctx)1353 static void *peg_unmarshal(JanetMarshalContext *ctx) {
1354     size_t bytecode_len = janet_unmarshal_size(ctx);
1355     uint32_t num_constants = (uint32_t) janet_unmarshal_int(ctx);
1356 
1357     /* Calculate offsets. Should match those in make_peg */
1358     size_t bytecode_start = size_padded(sizeof(JanetPeg), sizeof(uint32_t));
1359     size_t bytecode_size = bytecode_len * sizeof(uint32_t);
1360     size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
1361     size_t total_size = constants_start + sizeof(Janet) * (size_t) num_constants;
1362 
1363     /* DOS prevention? I.E. we could read bytecode and constants before
1364      * hand so we don't allocated a ton of memory on bad, short input */
1365 
1366     /* Allocate PEG */
1367     char *mem = janet_unmarshal_abstract(ctx, total_size);
1368     JanetPeg *peg = (JanetPeg *)mem;
1369     uint32_t *bytecode = (uint32_t *)(mem + bytecode_start);
1370     Janet *constants = (Janet *)(mem + constants_start);
1371     peg->bytecode = NULL;
1372     peg->constants = NULL;
1373     peg->bytecode_len = bytecode_len;
1374     peg->num_constants = num_constants;
1375 
1376     for (size_t i = 0; i < peg->bytecode_len; i++)
1377         bytecode[i] = (uint32_t) janet_unmarshal_int(ctx);
1378     for (uint32_t j = 0; j < peg->num_constants; j++)
1379         constants[j] = janet_unmarshal_janet(ctx);
1380 
1381     /* After here, no panics except for the bad: label. */
1382 
1383     /* Keep track at each index if an instruction was
1384      * reference (0x01) or is in a main bytecode position
1385      * (0x02). This lets us do a linear scan and not
1386      * need to a depth first traversal. It is stricter
1387      * than a dfs by not allowing certain kinds of unused
1388      * bytecode. */
1389     uint32_t blen = (int32_t) peg->bytecode_len;
1390     uint32_t clen = peg->num_constants;
1391     uint8_t *op_flags = janet_calloc(1, blen);
1392     if (NULL == op_flags) {
1393         JANET_OUT_OF_MEMORY;
1394     }
1395 
1396     /* verify peg bytecode */
1397     int32_t has_backref = 0;
1398     uint32_t i = 0;
1399     while (i < blen) {
1400         uint32_t instr = bytecode[i];
1401         uint32_t *rule = bytecode + i;
1402         op_flags[i] |= 0x02;
1403         switch (instr & 0x1F) {
1404             case RULE_LITERAL:
1405                 i += 2 + ((rule[1] + 3) >> 2);
1406                 break;
1407             case RULE_NCHAR:
1408             case RULE_NOTNCHAR:
1409             case RULE_RANGE:
1410             case RULE_POSITION:
1411             case RULE_LINE:
1412             case RULE_COLUMN:
1413                 /* [1 word] */
1414                 i += 2;
1415                 break;
1416             case RULE_BACKMATCH:
1417                 /* [1 word] */
1418                 i += 2;
1419                 has_backref = 1;
1420                 break;
1421             case RULE_SET:
1422                 /* [8 words] */
1423                 i += 9;
1424                 break;
1425             case RULE_LOOK:
1426                 /* [offset, rule] */
1427                 if (rule[2] >= blen) goto bad;
1428                 op_flags[rule[2]] |= 0x1;
1429                 i += 3;
1430                 break;
1431             case RULE_CHOICE:
1432             case RULE_SEQUENCE:
1433                 /* [len, rules...] */
1434             {
1435                 uint32_t len = rule[1];
1436                 for (uint32_t j = 0; j < len; j++) {
1437                     if (rule[2 + j] >= blen) goto bad;
1438                     op_flags[rule[2 + j]] |= 0x1;
1439                 }
1440                 i += 2 + len;
1441             }
1442             break;
1443             case RULE_IF:
1444             case RULE_IFNOT:
1445             case RULE_LENPREFIX:
1446                 /* [rule_a, rule_b (b if not a)] */
1447                 if (rule[1] >= blen) goto bad;
1448                 if (rule[2] >= blen) goto bad;
1449                 op_flags[rule[1]] |= 0x01;
1450                 op_flags[rule[2]] |= 0x01;
1451                 i += 3;
1452                 break;
1453             case RULE_BETWEEN:
1454                 /* [lo, hi, rule] */
1455                 if (rule[3] >= blen) goto bad;
1456                 op_flags[rule[3]] |= 0x01;
1457                 i += 4;
1458                 break;
1459             case RULE_ARGUMENT:
1460                 /* [searchtag, tag] */
1461                 i += 3;
1462                 break;
1463             case RULE_GETTAG:
1464                 /* [searchtag, tag] */
1465                 i += 3;
1466                 has_backref = 1;
1467                 break;
1468             case RULE_CONSTANT:
1469                 /* [constant, tag] */
1470                 if (rule[1] >= clen) goto bad;
1471                 i += 3;
1472                 break;
1473             case RULE_CAPTURE_NUM:
1474                 /* [rule, base, tag] */
1475                 if (rule[1] >= blen) goto bad;
1476                 op_flags[rule[1]] |= 0x01;
1477                 i += 4;
1478                 break;
1479             case RULE_ACCUMULATE:
1480             case RULE_GROUP:
1481             case RULE_CAPTURE:
1482             case RULE_UNREF:
1483                 /* [rule, tag] */
1484                 if (rule[1] >= blen) goto bad;
1485                 op_flags[rule[1]] |= 0x01;
1486                 i += 3;
1487                 break;
1488             case RULE_REPLACE:
1489             case RULE_MATCHTIME:
1490                 /* [rule, constant, tag] */
1491                 if (rule[1] >= blen) goto bad;
1492                 if (rule[2] >= clen) goto bad;
1493                 op_flags[rule[1]] |= 0x01;
1494                 i += 4;
1495                 break;
1496             case RULE_ERROR:
1497             case RULE_DROP:
1498             case RULE_NOT:
1499             case RULE_TO:
1500             case RULE_THRU:
1501                 /* [rule] */
1502                 if (rule[1] >= blen) goto bad;
1503                 op_flags[rule[1]] |= 0x01;
1504                 i += 2;
1505                 break;
1506             case RULE_READINT:
1507                 /* [ width | (endianess << 5) | (signedness << 6), tag ] */
1508                 if (rule[1] > JANET_MAX_READINT_WIDTH) goto bad;
1509                 i += 3;
1510                 break;
1511             default:
1512                 goto bad;
1513         }
1514     }
1515 
1516     /* last instruction cannot overflow */
1517     if (i != blen) goto bad;
1518 
1519     /* Make sure all referenced instructions are actually
1520      * in instruction positions. */
1521     for (i = 0; i < blen; i++)
1522         if (op_flags[i] == 0x01) goto bad;
1523 
1524     /* Good return */
1525     peg->bytecode = bytecode;
1526     peg->constants = constants;
1527     peg->has_backref = has_backref;
1528     janet_free(op_flags);
1529     return peg;
1530 
1531 bad:
1532     janet_free(op_flags);
1533     janet_panic("invalid peg bytecode");
1534 }
1535 
1536 static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out);
1537 static Janet peg_next(void *p, Janet key);
1538 
1539 const JanetAbstractType janet_peg_type = {
1540     "core/peg",
1541     NULL,
1542     peg_mark,
1543     cfun_peg_getter,
1544     NULL, /* put */
1545     peg_marshal,
1546     peg_unmarshal,
1547     NULL, /* tostring */
1548     NULL, /* compare */
1549     NULL, /* hash */
1550     peg_next,
1551     JANET_ATEND_NEXT
1552 };
1553 
1554 /* Convert Builder to JanetPeg (Janet Abstract Value) */
make_peg(Builder * b)1555 static JanetPeg *make_peg(Builder *b) {
1556     size_t bytecode_start = size_padded(sizeof(JanetPeg), sizeof(uint32_t));
1557     size_t bytecode_size = janet_v_count(b->bytecode) * sizeof(uint32_t);
1558     size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
1559     size_t constants_size = janet_v_count(b->constants) * sizeof(Janet);
1560     size_t total_size = constants_start + constants_size;
1561     char *mem = janet_abstract(&janet_peg_type, total_size);
1562     JanetPeg *peg = (JanetPeg *)mem;
1563     peg->bytecode = (uint32_t *)(mem + bytecode_start);
1564     peg->constants = (Janet *)(mem + constants_start);
1565     peg->num_constants = janet_v_count(b->constants);
1566     safe_memcpy(peg->bytecode, b->bytecode, bytecode_size);
1567     safe_memcpy(peg->constants, b->constants, constants_size);
1568     peg->bytecode_len = janet_v_count(b->bytecode);
1569     peg->has_backref = b->has_backref;
1570     return peg;
1571 }
1572 
1573 /* Compiler entry point */
compile_peg(Janet x)1574 static JanetPeg *compile_peg(Janet x) {
1575     Builder builder;
1576     builder.grammar = janet_table(0);
1577     builder.default_grammar = NULL;
1578     {
1579         Janet default_grammarv = janet_dyn("peg-grammar");
1580         if (janet_checktype(default_grammarv, JANET_TABLE)) {
1581             builder.default_grammar = janet_unwrap_table(default_grammarv);
1582         }
1583     }
1584     builder.tags = janet_table(0);
1585     builder.constants = NULL;
1586     builder.bytecode = NULL;
1587     builder.nexttag = 1;
1588     builder.form = x;
1589     builder.depth = JANET_RECURSION_GUARD;
1590     builder.has_backref = 0;
1591     peg_compile1(&builder, x);
1592     JanetPeg *peg = make_peg(&builder);
1593     builder_cleanup(&builder);
1594     return peg;
1595 }
1596 
1597 /*
1598  * C Functions
1599  */
1600 
1601 JANET_CORE_FN(cfun_peg_compile,
1602               "(peg/compile peg)",
1603               "Compiles a peg source data structure into a <core/peg>. This will speed up matching "
1604               "if the same peg will be used multiple times. Will also use `(dyn :peg-grammar)` to suppliment "
1605               "the grammar of the peg for otherwise undefined peg keywords.") {
1606     janet_fixarity(argc, 1);
1607     JanetPeg *peg = compile_peg(argv[0]);
1608     return janet_wrap_abstract(peg);
1609 }
1610 
1611 /* Common data for peg cfunctions */
1612 typedef struct {
1613     JanetPeg *peg;
1614     PegState s;
1615     JanetByteView bytes;
1616     JanetByteView repl;
1617     int32_t start;
1618 } PegCall;
1619 
1620 /* Initialize state for peg cfunctions */
peg_cfun_init(int32_t argc,Janet * argv,int get_replace)1621 static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
1622     PegCall ret;
1623     int32_t min = get_replace ? 3 : 2;
1624     janet_arity(argc, get_replace, -1);
1625     if (janet_checktype(argv[0], JANET_ABSTRACT) &&
1626             janet_abstract_type(janet_unwrap_abstract(argv[0])) == &janet_peg_type) {
1627         ret.peg = janet_unwrap_abstract(argv[0]);
1628     } else {
1629         ret.peg = compile_peg(argv[0]);
1630     }
1631     if (get_replace) {
1632         ret.repl = janet_getbytes(argv, 1);
1633         ret.bytes = janet_getbytes(argv, 2);
1634     } else {
1635         ret.bytes = janet_getbytes(argv, 1);
1636     }
1637     if (argc > min) {
1638         ret.start = janet_gethalfrange(argv, min, ret.bytes.len, "offset");
1639         ret.s.extrac = argc - min - 1;
1640         ret.s.extrav = janet_tuple_n(argv + min + 1, argc - min - 1);
1641     } else {
1642         ret.start = 0;
1643         ret.s.extrac = 0;
1644         ret.s.extrav = NULL;
1645     }
1646     ret.s.mode = PEG_MODE_NORMAL;
1647     ret.s.text_start = ret.bytes.bytes;
1648     ret.s.text_end = ret.bytes.bytes + ret.bytes.len;
1649     ret.s.depth = JANET_RECURSION_GUARD;
1650     ret.s.captures = janet_array(0);
1651     ret.s.tagged_captures = janet_array(0);
1652     ret.s.scratch = janet_buffer(10);
1653     ret.s.tags = janet_buffer(10);
1654     ret.s.constants = ret.peg->constants;
1655     ret.s.bytecode = ret.peg->bytecode;
1656     ret.s.linemap = NULL;
1657     ret.s.linemaplen = -1;
1658     ret.s.has_backref = ret.peg->has_backref;
1659     return ret;
1660 }
1661 
peg_call_reset(PegCall * c)1662 static void peg_call_reset(PegCall *c) {
1663     c->s.captures->count = 0;
1664     c->s.scratch->count = 0;
1665     c->s.tags->count = 0;
1666 }
1667 
1668 JANET_CORE_FN(cfun_peg_match,
1669               "(peg/match peg text &opt start & args)",
1670               "Match a Parsing Expression Grammar to a byte string and return an array of captured values. "
1671               "Returns nil if text does not match the language defined by peg. The syntax of PEGs is documented on the Janet website.") {
1672     PegCall c = peg_cfun_init(argc, argv, 0);
1673     const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + c.start);
1674     return result ? janet_wrap_array(c.s.captures) : janet_wrap_nil();
1675 }
1676 
1677 JANET_CORE_FN(cfun_peg_find,
1678               "(peg/find peg text &opt start & args)",
1679               "Find first index where the peg matches in text. Returns an integer, or nil if not found.") {
1680     PegCall c = peg_cfun_init(argc, argv, 0);
1681     for (int32_t i = c.start; i < c.bytes.len; i++) {
1682         peg_call_reset(&c);
1683         if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
1684             return janet_wrap_integer(i);
1685     }
1686     return janet_wrap_nil();
1687 }
1688 
1689 JANET_CORE_FN(cfun_peg_find_all,
1690               "(peg/find-all peg text &opt start & args)",
1691               "Find all indexes where the peg matches in text. Returns an array of integers.") {
1692     PegCall c = peg_cfun_init(argc, argv, 0);
1693     JanetArray *ret = janet_array(0);
1694     for (int32_t i = c.start; i < c.bytes.len; i++) {
1695         peg_call_reset(&c);
1696         if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
1697             janet_array_push(ret, janet_wrap_integer(i));
1698     }
1699     return janet_wrap_array(ret);
1700 }
1701 
cfun_peg_replace_generic(int32_t argc,Janet * argv,int only_one)1702 static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
1703     PegCall c = peg_cfun_init(argc, argv, 1);
1704     JanetBuffer *ret = janet_buffer(0);
1705     int32_t trail = 0;
1706     for (int32_t i = c.start; i < c.bytes.len;) {
1707         peg_call_reset(&c);
1708         const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i);
1709         if (NULL != result) {
1710             if (trail < i) {
1711                 janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (i - trail));
1712                 trail = i;
1713             }
1714             int32_t nexti = (int32_t)(result - c.bytes.bytes);
1715             janet_buffer_push_bytes(ret, c.repl.bytes, c.repl.len);
1716             trail = nexti;
1717             if (nexti == i) nexti++;
1718             i = nexti;
1719             if (only_one) break;
1720         } else {
1721             i++;
1722         }
1723     }
1724     if (trail < c.bytes.len) {
1725         janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (c.bytes.len - trail));
1726     }
1727     return janet_wrap_buffer(ret);
1728 }
1729 
1730 JANET_CORE_FN(cfun_peg_replace_all,
1731               "(peg/replace-all peg repl text &opt start & args)",
1732               "Replace all matches of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement.") {
1733     return cfun_peg_replace_generic(argc, argv, 0);
1734 }
1735 
1736 JANET_CORE_FN(cfun_peg_replace,
1737               "(peg/replace peg repl text &opt start & args)",
1738               "Replace first match of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement. "
1739               "If no matches are found, returns the input string in a new buffer.") {
1740     return cfun_peg_replace_generic(argc, argv, 1);
1741 }
1742 
1743 static JanetMethod peg_methods[] = {
1744     {"match", cfun_peg_match},
1745     {"find", cfun_peg_find},
1746     {"find-all", cfun_peg_find_all},
1747     {"replace", cfun_peg_replace},
1748     {"replace-all", cfun_peg_replace_all},
1749     {NULL, NULL}
1750 };
1751 
cfun_peg_getter(JanetAbstract a,Janet key,Janet * out)1752 static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out) {
1753     (void) a;
1754     if (!janet_checktype(key, JANET_KEYWORD))
1755         return 0;
1756     return janet_getmethod(janet_unwrap_keyword(key), peg_methods, out);
1757 }
1758 
peg_next(void * p,Janet key)1759 static Janet peg_next(void *p, Janet key) {
1760     (void) p;
1761     return janet_nextmethod(peg_methods, key);
1762 }
1763 
1764 /* Load the peg module */
janet_lib_peg(JanetTable * env)1765 void janet_lib_peg(JanetTable *env) {
1766     JanetRegExt cfuns[] = {
1767         JANET_CORE_REG("peg/compile", cfun_peg_compile),
1768         JANET_CORE_REG("peg/match", cfun_peg_match),
1769         JANET_CORE_REG("peg/find", cfun_peg_find),
1770         JANET_CORE_REG("peg/find-all", cfun_peg_find_all),
1771         JANET_CORE_REG("peg/replace", cfun_peg_replace),
1772         JANET_CORE_REG("peg/replace-all", cfun_peg_replace_all),
1773         JANET_REG_END
1774     };
1775     janet_core_cfuns_ext(env, NULL, cfuns);
1776     janet_register_abstract_type(&janet_peg_type);
1777 }
1778 
1779 #endif /* ifdef JANET_PEG */
1780