1
2 /******************************************************************************
3 * MODULE : packrat_parser.cpp
4 * DESCRIPTION: efficient packrat parsing
5 * COPYRIGHT : (C) 2010 Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11
12 #include "packrat_parser.hpp"
13 #include "analyze.hpp"
14 #include "drd_std.hpp"
15 #include "language.hpp" //(en|de)code_color
16
17 extern tree the_et;
18 bool packrat_invalid_colors= false;
19
20 /******************************************************************************
21 * Constructor
22 ******************************************************************************/
23
packrat_parser_rep(packrat_grammar gr)24 packrat_parser_rep::packrat_parser_rep (packrat_grammar gr):
25 lan_name(gr->lan_name),
26 grammar (gr->grammar),
27 productions (gr->productions),
28 properties (gr->properties),
29 current_tree (packrat_uninit),
30 current_string (""),
31 current_start (-1),
32 current_end (-1),
33 current_path_pos (-1),
34 current_pos_path (-1),
35 current_cursor (-1),
36 current_input (),
37 current_cache (PACKRAT_UNDEFINED),
38 current_production (packrat_uninit) {}
39
40 packrat_parser
make_packrat_parser(string lan,tree in)41 make_packrat_parser (string lan, tree in) {
42 static string last_lan = "";
43 static tree last_in = "";
44 static packrat_parser last_par;
45 if (lan != last_lan || in != last_in) {
46 packrat_grammar gr= find_packrat_grammar (lan);
47 last_lan = lan;
48 last_in = copy (in);
49 last_par = packrat_parser (gr, in);
50 }
51 return last_par;
52 }
53
54 packrat_parser
make_packrat_parser(string lan,tree in,path in_pos)55 make_packrat_parser (string lan, tree in, path in_pos) {
56 static string last_lan = "";
57 static tree last_in = "";
58 static path last_in_pos= path ();
59 static packrat_parser last_par;
60 if (lan != last_lan || in != last_in || in_pos != last_in_pos) {
61 packrat_grammar gr= find_packrat_grammar (lan);
62 last_lan = lan;
63 last_in = copy (in);
64 last_in_pos= copy (last_in_pos);
65 last_par = packrat_parser (gr, in, in_pos);
66 }
67 return last_par;
68 }
69
70 /******************************************************************************
71 * Setting up the input
72 ******************************************************************************/
73
74 void
set_input(tree t)75 packrat_parser_rep::set_input (tree t) {
76 current_string= "";
77 current_tree = t;
78 serialize (t, path ());
79 if (DEBUG_FLATTEN)
80 debug_packrat << "Input " << current_string << "\n";
81 current_input= encode_tokens (current_string);
82 }
83
84 void
set_cursor(path p)85 packrat_parser_rep::set_cursor (path p) {
86 if (is_nil (p)) current_cursor= -1;
87 else current_cursor= encode_tree_position (p);
88 //cout << current_input << ", " << current_cursor << "\n";
89 }
90
91 /******************************************************************************
92 * Encoding and decoding of cursor positions in the input
93 ******************************************************************************/
94
95 C
encode_string_position(int i)96 packrat_parser_rep::encode_string_position (int i) {
97 if (i < 0) return PACKRAT_FAILED;
98 int j=0;
99 C k=0;
100 while (j<i && j<N(current_string)) {
101 tm_char_forwards (current_string, j);
102 k++;
103 }
104 return k;
105 }
106
107 int
encode_path(tree t,path p,path pos)108 packrat_parser_rep::encode_path (tree t, path p, path pos) {
109 //cout << "Search " << pos << " in " << t << ", " << p << "\n";
110 //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
111 if (is_nil (pos) || !current_start->contains (p)) return -1;
112 else if (is_atomic (t)) {
113 if (current_path_pos->contains (p * pos))
114 return current_path_pos[p * pos];
115 else if (pos->item < 0 || pos->item > N(t->label)) return -1;
116 return current_start[p] + pos->item;
117 }
118 else {
119 if (pos == path (0)) return current_start[p];
120 if (pos == path (1)) return current_end[p];
121 if (pos->item < 0 || pos->item > N(t) || is_nil (pos->next)) return -1;
122 return encode_path (t[pos->item], p * pos->item, pos->next);
123 }
124 }
125
126 C
encode_tree_position(path p)127 packrat_parser_rep::encode_tree_position (path p) {
128 if (is_nil (p) || p->item < 0) return PACKRAT_FAILED;
129 int i= encode_path (current_tree, path (), p);
130 return encode_string_position (i);
131 }
132
133 int
decode_string_position(C pos)134 packrat_parser_rep::decode_string_position (C pos) {
135 //cout << "Decode " << pos << "\n";
136 if (pos == PACKRAT_FAILED) return -1;
137 int i=0;
138 C k=0;
139 while (i<N(current_string) && k<pos) {
140 tm_char_forwards (current_string, i);
141 k++;
142 }
143 return i;
144 }
145
146 path
decode_path(tree t,path p,int pos)147 packrat_parser_rep::decode_path (tree t, path p, int pos) {
148 //cout << "Search " << pos << " in " << t << ", " << p << "\n";
149 //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
150 if (is_atomic (t)) {
151 if (current_pos_path->contains (pos))
152 return current_pos_path[pos];
153 else return p * (pos - current_start[p]);
154 }
155 else {
156 for (int i=0; i<N(t); i++)
157 if (pos >= current_start[p*i] && pos <= current_end[p*i])
158 return decode_path (t[i], p * i, pos);
159 if (pos <= current_start[p]) return p * 0;
160 if (pos >= current_end[p]) return p * 1;
161 return p * 0;
162 }
163 }
164
165 path
decode_tree_position(C pos)166 packrat_parser_rep::decode_tree_position (C pos) {
167 int i= decode_string_position (pos);
168 if (i < 0) return path (i);
169 return decode_path (current_tree, path (), i);
170 }
171
172 /******************************************************************************
173 * Packrat parsing
174 ******************************************************************************/
175
176 bool
starts(tree t,string s)177 starts (tree t, string s) {
178 return is_atomic (t) && starts (t->label, s);
179 }
180
181 C
parse(C sym,C pos)182 packrat_parser_rep::parse (C sym, C pos) {
183 D key= (((D) sym) << 32) + ((D) (sym^pos));
184 C im = current_cache [key];
185 if (im != PACKRAT_UNDEFINED) {
186 //cout << "Cached " << sym << " at " << pos << " -> " << im << LF;
187 return im;
188 }
189 current_cache (key)= PACKRAT_FAILED;
190 if (DEBUG_PACKRAT)
191 debug_packrat << "Parse " << packrat_decode[sym]
192 << " at " << pos << INDENT << LF;
193 if (sym >= PACKRAT_TM_OPEN) {
194 array<C> inst= grammar [sym];
195 //cout << "Parse " << inst << " at " << pos << LF;
196 switch (inst[0]) {
197 case PACKRAT_OR:
198 im= PACKRAT_FAILED;
199 for (int i=1; i<N(inst); i++) {
200 im= parse (inst[i], pos);
201 if (im != PACKRAT_FAILED) break;
202 }
203 break;
204 case PACKRAT_CONCAT:
205 im= pos;
206 for (int i=1; i<N(inst); i++) {
207 im= parse (inst[i], im);
208 if (im == PACKRAT_FAILED) break;
209 }
210 break;
211 case PACKRAT_WHILE:
212 im= pos;
213 while (true) {
214 C next= parse (inst[1], im);
215 if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
216 im= next;
217 }
218 break;
219 case PACKRAT_REPEAT:
220 im= parse (inst[1], pos);
221 if (im != PACKRAT_FAILED)
222 while (true) {
223 C next= parse (inst[1], im);
224 if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
225 im= next;
226 }
227 break;
228 case PACKRAT_RANGE:
229 if (pos < N (current_input) &&
230 current_input [pos] >= inst[1] &&
231 current_input [pos] <= inst[2])
232 im= pos + 1;
233 else im= PACKRAT_FAILED;
234 break;
235 case PACKRAT_NOT:
236 if (parse (inst[1], pos) == PACKRAT_FAILED) im= pos;
237 else im= PACKRAT_FAILED;
238 break;
239 case PACKRAT_EXCEPT:
240 im= parse (inst[1], pos);
241 if (im != PACKRAT_FAILED)
242 if (parse (inst[2], pos) != PACKRAT_FAILED)
243 im= PACKRAT_FAILED;
244 break;
245 case PACKRAT_TM_OPEN:
246 if (pos < N (current_input) &&
247 starts (packrat_decode[current_input[pos]], "<\\"))
248 im= pos + 1;
249 else im= PACKRAT_FAILED;
250 break;
251 case PACKRAT_TM_ANY:
252 im= pos;
253 while (true) {
254 C old= im;
255 im= parse (PACKRAT_TM_OPEN, old);
256 if (im == PACKRAT_FAILED)
257 im= parse (PACKRAT_TM_LEAF, old);
258 else {
259 im= parse (PACKRAT_TM_ARGS, im);
260 if (im != PACKRAT_FAILED)
261 im= parse (encode_token ("</>"), im);
262 }
263 if (old == im) break;
264 }
265 break;
266 case PACKRAT_TM_ARGS:
267 im= parse (PACKRAT_TM_ANY, pos);
268 while (im < N (current_input))
269 if (current_input[im] != encode_token ("<|>")) break;
270 else im= parse (PACKRAT_TM_ANY, im + 1);
271 break;
272 case PACKRAT_TM_LEAF:
273 im= pos;
274 while (im < N (current_input)) {
275 tree t= packrat_decode[current_input[im]];
276 if (starts (t, "<\\") || t == "<|>" || t == "</>") break;
277 else im++;
278 }
279 break;
280 case PACKRAT_TM_CHAR:
281 if (pos >= N (current_input)) im= PACKRAT_FAILED;
282 else {
283 tree t= packrat_decode[current_input[pos]];
284 if (starts (t, "<\\") || t == "<|>" || t == "</>") im= PACKRAT_FAILED;
285 else im= pos + 1;
286 }
287 break;
288 case PACKRAT_TM_CURSOR:
289 if (pos == current_cursor) im= pos;
290 else im= PACKRAT_FAILED;
291 break;
292 case PACKRAT_TM_FAIL:
293 im= PACKRAT_FAILED;
294 break;
295 default:
296 im= parse (inst[0], pos);
297 break;
298 }
299 }
300 else {
301 if (pos < N (current_input) && current_input[pos] == sym) im= pos + 1;
302 else im= PACKRAT_FAILED;
303 }
304 current_cache (key)= im;
305 if (DEBUG_PACKRAT)
306 debug_packrat << UNINDENT << "Parsed " << packrat_decode[sym]
307 << " at " << pos << " -> " << im << LF;
308 return im;
309 }
310
311 /******************************************************************************
312 * Inspecting the parse tree
313 ******************************************************************************/
314
315 void
inspect(C sym,C pos,array<C> & syms,array<C> & poss)316 packrat_parser_rep::inspect (C sym, C pos, array<C>& syms, array<C>& poss) {
317 syms= array<C> ();
318 poss= array<C> ();
319 C next= parse (sym, pos);
320 if (next == PACKRAT_FAILED) return;
321 if (sym >= PACKRAT_TM_OPEN) {
322 array<C> inst= grammar [sym];
323 //cout << "Parse " << inst << " at " << pos << LF;
324 switch (inst[0]) {
325 case PACKRAT_OR:
326 for (int i=1; i<N(inst); i++)
327 if (parse (inst[i], pos) != PACKRAT_FAILED) {
328 inspect (inst[i], pos, syms, poss);
329 break;
330 }
331 break;
332 case PACKRAT_CONCAT:
333 for (int i=1; i<N(inst); i++) {
334 next= parse (inst[i], pos);
335 if (next == PACKRAT_FAILED) break;
336 syms << inst[i];
337 poss << pos;
338 pos= next;
339 }
340 break;
341 case PACKRAT_WHILE:
342 case PACKRAT_REPEAT:
343 while (true) {
344 C next= parse (inst[1], pos);
345 if (next == PACKRAT_FAILED) break;
346 syms << inst[1];
347 poss << pos;
348 pos= next;
349 }
350 break;
351 case PACKRAT_RANGE:
352 case PACKRAT_NOT:
353 break;
354 case PACKRAT_EXCEPT:
355 inspect (inst[1], pos, syms, poss);
356 break;
357 case PACKRAT_TM_OPEN:
358 case PACKRAT_TM_ANY:
359 case PACKRAT_TM_ARGS:
360 case PACKRAT_TM_LEAF:
361 case PACKRAT_TM_CHAR:
362 case PACKRAT_TM_CURSOR:
363 case PACKRAT_TM_FAIL:
364 break;
365 default:
366 inspect (inst[0], pos, syms, poss);
367 break;
368 }
369 }
370 }
371
372 bool
is_left_recursive(C sym)373 packrat_parser_rep::is_left_recursive (C sym) {
374 if (sym < PACKRAT_TM_OPEN) return false;
375 array<C> inst= grammar [sym];
376 if (inst[0] != PACKRAT_CONCAT || N(inst) != 3) return false;
377 if (inst[1] < PACKRAT_TM_OPEN) return false;
378 tree t= packrat_decode[inst[1]];
379 return is_compound (t, "symbol", 1) && ends (t[0]->label, "-head");
380 }
381
382 bool
is_associative(C sym)383 packrat_parser_rep::is_associative (C sym) {
384 static C prop= encode_symbol (compound ("property", "associativity"));
385 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
386 if (!properties->contains (key)) return false;
387 return properties[key] == "associative";
388 }
389
390 bool
is_anti_associative(C sym)391 packrat_parser_rep::is_anti_associative (C sym) {
392 static C prop= encode_symbol (compound ("property", "associativity"));
393 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
394 if (!properties->contains (key)) return false;
395 return properties[key] == "anti-associative";
396 }
397
398 bool
is_list_like(C sym)399 packrat_parser_rep::is_list_like (C sym) {
400 (void) sym;
401 return false;
402 }
403
404 bool
is_selectable(C sym)405 packrat_parser_rep::is_selectable (C sym) {
406 tree t= packrat_decode[sym];
407 if (is_compound (t, "partial", 1)) return true;
408 if (!is_compound (t, "symbol", 1)) return false;
409 string s= t[0]->label;
410 return !ends (s, "-head") && !ends (s, "-tail");
411 }
412
413 /******************************************************************************
414 * Finding all enclosing structures at a given position
415 ******************************************************************************/
416
417 void
context(C sym,C pos,C w1,C w2,int mode,array<C> & kind,array<C> & begin,array<C> & end)418 packrat_parser_rep::context
419 (C sym, C pos, C w1, C w2, int mode,
420 array<C>& kind, array<C>& begin, array<C>& end)
421 {
422 C next= parse (sym, pos);
423 if (next < 0 || pos > w1 || next < w2) return;
424
425 if (mode == 2 && (pos == w1 || next == w2)) {
426 static C prop= encode_symbol (compound ("property", "operator"));
427 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
428 if (properties->contains (key)) return;
429 }
430
431 if (true) {
432 static C sel_prop= encode_symbol (compound ("property", "selectable"));
433 static C foc_prop= encode_symbol (compound ("property", "focus"));
434 D sel_key = (((D) sel_prop) << 32) + ((D) (sym ^ sel_prop));
435 D foc_key = (((D) foc_prop) << 32) + ((D) (sym ^ foc_prop));
436 if (properties->contains (sel_key) &&
437 properties[sel_key] == "inside");
438 else if (properties->contains (foc_key) &&
439 properties[foc_key] == "disallow" &&
440 mode == 2);
441 else {
442 int n= N(kind);
443 if (n >= 1 && begin[n-1] == pos && end[n-1] == next) {
444 if (is_selectable (sym) || !is_selectable (kind[n-1]))
445 kind[n-1]= sym;
446 }
447 else {
448 kind << sym;
449 begin << pos;
450 end << next;
451 }
452 }
453 }
454
455 if (mode >= 0) {
456 static C prop= encode_symbol (compound ("property", "atomic"));
457 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
458 if (properties->contains (key)) return;
459 }
460
461 if (is_left_recursive (sym) && mode == 0) {
462 array<C> inst= grammar [sym];
463 C before= pos;
464 C middle= parse (inst[1], before);
465 if (middle == PACKRAT_FAILED) return;
466 C after = parse (inst[2], middle);
467 if (after == PACKRAT_FAILED) return;
468 array<C> csym;
469 array<C> cpos;
470 inspect (inst[2], middle, csym, cpos);
471 csym= append (inst[1], csym);
472 cpos= append (before, cpos);
473 cpos << after;
474 int i1, i2;
475 for (i1=0; i1<N(csym); i1++)
476 if (cpos[i1+1] > w1) break;
477 for (i2=i1; i2<N(csym); i2++)
478 if (cpos[i2+1] >= w2) break;
479 if (i1 == i2) {
480 int i, n= N(kind);
481 context (csym[i1], cpos[i1], w1, w2, mode, kind, begin, end);
482 for (i=n; i<N(kind); i++)
483 if (is_selectable (kind[i]))
484 return;
485 kind -> resize (n);
486 begin -> resize (n);
487 end -> resize (n);
488 }
489 C alt_start= -1;
490 while (i1 > 0) {
491 array<C> ccsym;
492 array<C> ccpos;
493 inspect (csym[i1], cpos[i1], ccsym, ccpos);
494 if (N(ccsym)>1 && is_associative (ccsym[0])) {
495 if (w1 >= ccpos[1]) alt_start= ccpos[1];
496 break;
497 }
498 if (N(ccsym)>0 && is_anti_associative (ccsym[0])) break;
499 i1--;
500 }
501 tree sel= compound ("partial", packrat_decode[sym]);
502 kind << encode_symbol (sel);
503 begin << (alt_start<0? cpos[i1]: alt_start);
504 end << cpos[i2+1];
505 return;
506 }
507
508 if (sym >= PACKRAT_TM_OPEN) {
509 array<C> inst= grammar [sym];
510 //cout << "Context " << inst << " at " << pos << LF;
511 switch (inst[0]) {
512 case PACKRAT_OR:
513 for (int i=1; i<N(inst); i++)
514 if (parse (inst[i], pos) != PACKRAT_FAILED) {
515 context (inst[i], pos, w1, w2, mode, kind, begin, end);
516 break;
517 }
518 break;
519 case PACKRAT_CONCAT:
520 for (int i=1; i<N(inst); i++) {
521 next= parse (inst[i], pos);
522 if (next == PACKRAT_FAILED) break;
523 if (pos <= w1 && w2 <= next)
524 context (inst[i], pos, w1, w2, mode, kind, begin, end);
525 if (next > w2) break;
526 pos= next;
527 }
528 break;
529 case PACKRAT_WHILE:
530 case PACKRAT_REPEAT:
531 while (true) {
532 C next= parse (inst[1], pos);
533 if (next == PACKRAT_FAILED) break;
534 if (pos <= w1 && w2 <= next)
535 context (inst[1], pos, w1, w2, mode, kind, begin, end);
536 if (next > w2) break;
537 pos= next;
538 }
539 break;
540 case PACKRAT_RANGE:
541 case PACKRAT_NOT:
542 break;
543 case PACKRAT_EXCEPT:
544 context (inst[1], pos, w1, w2, mode, kind, begin, end);
545 break;
546 case PACKRAT_TM_OPEN:
547 case PACKRAT_TM_ANY:
548 case PACKRAT_TM_ARGS:
549 case PACKRAT_TM_LEAF:
550 case PACKRAT_TM_CHAR:
551 case PACKRAT_TM_CURSOR:
552 case PACKRAT_TM_FAIL:
553 break;
554 default:
555 context (inst[0], pos, w1, w2, mode, kind, begin, end);
556 break;
557 }
558 }
559 }
560
561 void
compress(array<C> & kind,array<C> & begin,array<C> & end)562 packrat_parser_rep::compress
563 (array<C>& kind, array<C>& begin, array<C>& end)
564 {
565 array<C> new_kind, new_begin, new_end;
566 for (int i=0; i<N(kind); i++) {
567 int n= N(new_kind);
568 if (is_selectable (kind[i]))
569 if (N(new_kind) == 0 ||
570 new_kind [n-1] != kind[i] ||
571 (new_begin[n-1] != begin[i] && new_end[n-1] != end[i])) {
572 new_kind << kind[i];
573 new_begin << begin[i];
574 new_end << end[i];
575 }
576 }
577 kind = new_kind;
578 begin= new_begin;
579 end = new_end;
580 }
581
582 /******************************************************************************
583 * Syntax highlighting
584 ******************************************************************************/
585
586 void
highlight(tree t,path tp,path p1,path p2,int col)587 packrat_parser_rep::highlight (tree t, path tp, path p1, path p2, int col) {
588 if (p1 == p2);
589 else if (is_atomic (t)) {
590 string s= t->label;
591 ASSERT (is_atom (p1) && is_atom (p2), "invalid selection");
592 ASSERT (0 <= p1->item && p1->item <= p2->item && p2->item <= N(s),
593 "invalid selection");
594 attach_highlight (t, current_hl_lan, col, p1->item, p2->item);
595 }
596 else if (N(t) == 0);
597 else {
598 ASSERT (!is_nil (p1) && !is_nil (p2) && p1->item <= p2->item,
599 "invalid selection");
600 if (p1 == path (0)) p1= path (0, 0);
601 if (p2 == path (1)) p2= path (N(t) - 1, right_index (t[N(t) -1]));
602 for (int i= max (0, p1->item); i <= min (p2->item, N(t)-1); i++) {
603 path q1= (i == p1->item? p1->next: path (0));
604 path q2= (i == p2->item? p2->next: path (right_index (t[i])));
605 highlight (t[i], tp * i, q1, q2, col);
606 }
607 }
608 }
609
610 void
highlight(C sym,C pos)611 packrat_parser_rep::highlight (C sym, C pos) {
612 C next= parse (sym, pos);
613 if (next < 0) return;
614 if (sym >= PACKRAT_SYMBOLS) {
615 static C prop= encode_symbol (compound ("property", "highlight"));
616 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
617 if (properties->contains (key)) {
618 int col = encode_color (properties [key]);
619 path start= decode_tree_position (pos);
620 path end = decode_tree_position (next);
621 highlight (current_tree, path (), start, end, col);
622 static C prop= encode_symbol (compound ("property", "transparent"));
623 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
624 if (!properties->contains (key)) return;
625 }
626 }
627
628 if (sym >= PACKRAT_TM_OPEN) {
629 array<C> inst= grammar [sym];
630 //cout << "Parse " << inst << " at " << pos << LF;
631 switch (inst[0]) {
632 case PACKRAT_OR:
633 for (int i=1; i<N(inst); i++)
634 if (parse (inst[i], pos) != PACKRAT_FAILED) {
635 highlight (inst[i], pos);
636 break;
637 }
638 break;
639 case PACKRAT_CONCAT:
640 for (int i=1; i<N(inst); i++) {
641 next= parse (inst[i], pos);
642 highlight (inst[i], pos);
643 pos= next;
644 }
645 break;
646 case PACKRAT_WHILE:
647 case PACKRAT_REPEAT:
648 while (true) {
649 C next= parse (inst[1], pos);
650 if (next == PACKRAT_FAILED) break;
651 highlight (inst[1], pos);
652 if (next == pos) break;
653 pos= next;
654 }
655 break;
656 case PACKRAT_RANGE:
657 case PACKRAT_NOT:
658 break;
659 case PACKRAT_EXCEPT:
660 highlight (inst[1], pos);
661 break;
662 case PACKRAT_TM_OPEN:
663 case PACKRAT_TM_ANY:
664 case PACKRAT_TM_ARGS:
665 case PACKRAT_TM_LEAF:
666 case PACKRAT_TM_CHAR:
667 case PACKRAT_TM_CURSOR:
668 case PACKRAT_TM_FAIL:
669 break;
670 default:
671 highlight (inst[0], pos);
672 break;
673 }
674 }
675 }
676
677 /******************************************************************************
678 * Memoized and accelerated highlighting
679 ******************************************************************************/
680
681 static bool
empty_line(tree t)682 empty_line (tree t) {
683 if (!is_atomic (t)) return false;
684 string s= t->label;
685 for (int i=0; i<N(s); i++)
686 if (s[i] != ' ') return false;
687 return true;
688 }
689
690 static bool
consistent_portion(tree t,int begin,int end)691 consistent_portion (tree t, int begin, int end) {
692 int level= 0;
693 for (int i=begin; i<end; i++)
694 if (is_atomic (t[i])) {
695 string s= t[i]->label;
696 for (int j=0; j<N(s); j++)
697 switch (s[j]) {
698 case '(': level++; break;
699 case ')': if (level <= 0) return false; level--; break;
700 case '[': level++; break;
701 case ']': if (level <= 0) return false; level--; break;
702 case '{': level++; break;
703 case '}': if (level <= 0) return false; level--; break;
704 default : break;
705 }
706 }
707 return level == 0;
708 }
709
710 static void
consistent_enlargement(tree t,int & begin,int & end)711 consistent_enlargement (tree t, int& begin, int& end) {
712 while (begin > 0 || end < N(t)) {
713 while (begin > 0 && !empty_line (t[begin-1])) begin--;
714 while (end < N(t) && !empty_line (t[end ])) end++;
715 if (consistent_portion (t, begin, end)) return;
716 //cout << "Inconsistent " << begin << " -- " << end << "\n";
717 begin= max (0 , begin - max (end - begin, 1));
718 end = min (N(t), end + max (end - begin, 1));
719 //cout << " Try " << begin << " -- " << end << "\n";
720 }
721 }
722
723 /******************************************************************************
724 * User interface
725 ******************************************************************************/
726
727 path
packrat_parse(string lan,string sym,tree in)728 packrat_parse (string lan, string sym, tree in) {
729 packrat_parser par= make_packrat_parser (lan, in);
730 C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
731 return par->decode_tree_position (pos);
732 }
733
734 bool
packrat_correct(string lan,string sym,tree in)735 packrat_correct (string lan, string sym, tree in) {
736 packrat_parser par= make_packrat_parser (lan, in);
737 C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
738 return pos == N(par->current_input);
739 }
740
741 bool
packrat_available_path(string lan,tree in,path in_p)742 packrat_available_path (string lan, tree in, path in_p) {
743 packrat_parser par= make_packrat_parser (lan, in);
744 return par->current_start->contains (in_p);
745 }
746
747 object
packrat_context(string lan,string s,tree in,path in_pos)748 packrat_context (string lan, string s, tree in, path in_pos) {
749 //cout << "Context " << in << " at " << in_pos
750 // << " (" << lan << ", " << s << ")" << LF;
751 packrat_parser par= make_packrat_parser (lan, in);
752 C sym= encode_symbol (compound ("symbol", s));
753 if (par->parse (sym, 0) != N(par->current_input))
754 par= make_packrat_parser (lan, in, in_pos);
755 C pos= par->encode_tree_position (in_pos);
756 if (pos == PACKRAT_FAILED) return object (false);
757 array<C> kind, begin, end;
758 par->context (sym, 0, pos-1, pos+1, 0, kind, begin, end);
759 par->compress (kind, begin, end);
760 object ret= null_object ();
761 for (int i=0; i<N(kind); i++) {
762 object x1 (symbol_object (packrat_decode[kind[i]][0]->label));
763 object x2 (par->decode_tree_position (begin[i]));
764 object x3 (par->decode_tree_position (end[i]));
765 ret= cons (list_object (x1, x2, x3), ret);
766 }
767 return ret;
768 }
769
770 bool
packrat_select(string lan,string s,tree in,path in_pos,path & p1,path & p2,int mode)771 packrat_select (string lan, string s, tree in, path in_pos,
772 path& p1, path& p2, int mode)
773 {
774 // mode= 0: genuine semantic selection
775 // mode= 1: strictly larger selection for select_enlarge
776 // mode= 2: determine environment rectangles
777 if (path_less (p2, p1))
778 return packrat_select (lan, s, in, in_pos, p2, p1, mode);
779 //cout << "Enlarge " << p1 << " -- " << p2 << " in " << in
780 //<< " (" << lan << ", " << s << ")" << LF;
781 packrat_parser par= make_packrat_parser (lan, in);
782 C sym = encode_symbol (compound ("symbol", s));
783 if (par->parse (sym, 0) != N(par->current_input))
784 par= make_packrat_parser (lan, in, in_pos);
785 C pos1= par->encode_tree_position (p1);
786 C pos2= par->encode_tree_position (p2);
787 //cout << "Encoded " << pos1 << " -- " << pos2
788 // << " in " << par->current_string << LF;
789 if (par->parse (sym, 0) != N(par->current_input)) return false;
790 if (pos1 == PACKRAT_FAILED || pos2 == PACKRAT_FAILED) return false;
791 array<C> kind, begin, end;
792 C pos0= pos1;
793 if ((mode == 1 && pos1 == pos2) || mode == 2) pos0= max (pos1 - 1, 0);
794 par->context (sym, 0, pos0, pos2, mode, kind, begin, end);
795 //for (int i=0; i<N(kind); i++)
796 // cout << i << ":\t"
797 // << par->decode_tree_position (begin[i]) << "\t"
798 // << par->decode_tree_position (end[i]) << "\t"
799 // << packrat_decode[kind[i]] << LF;
800 par->compress (kind, begin, end);
801 int n= N(kind);
802 if (n == 0) return false;
803 if (mode == 1) {
804 if (pos1 == begin[n-1] && pos2 == end[n-1]) n--;
805 if (n == 0) return false;
806 }
807 p1= par->decode_tree_position (begin[n-1]);
808 p2= par->decode_tree_position (end[n-1]);
809 //cout << "Selected " << packrat_decode[kind[n-1]] << LF;
810 return true;
811 }
812
813 void
packrat_highlight_subtree(string lan,string s,tree in)814 packrat_highlight_subtree (string lan, string s, tree in) {
815 //cout << "Highlight " << lan << ", " << s << " in " << in << "\n";
816 int hl_lan= packrat_abbreviation (lan, s);
817 if (hl_lan == 0) return;
818 packrat_parser par= make_packrat_parser (lan, in);
819 C sym = encode_symbol (compound ("symbol", s));
820 if (par->parse (sym, 0) == N(par->current_input)) {
821 par->current_hl_lan= hl_lan;
822 par->highlight (sym, 0);
823 }
824 }
825
826 void
packrat_highlight(string lan,string s,tree in)827 packrat_highlight (string lan, string s, tree in) {
828 int hl_lan= packrat_abbreviation (lan, s);
829 if (hl_lan == 0) return;
830 //cout << "Highlight " << in << "\n";
831 if (is_func (in, DOCUMENT)) {
832 int i, begin, end;
833 for (begin=0; begin<N(in); begin++)
834 if (!has_highlight (in[begin], hl_lan))
835 break;
836 for (end=N(in)-1; end>begin; end--)
837 if (!has_highlight (in[end-1], hl_lan))
838 break;
839 consistent_enlargement (in, begin, end);
840 for (i=begin; i<end; i++)
841 detach_highlight (in[i], hl_lan);
842 attach_highlight (in, hl_lan);
843 packrat_highlight_subtree (lan, s, in (begin, end));
844 }
845 else {
846 if (is_compound (in))
847 for (int i=0; i<N(in); i++)
848 detach_highlight (in[i], hl_lan);
849 attach_highlight (in, hl_lan);
850 packrat_highlight_subtree (lan, s, in);
851 }
852 }
853