1 
2 /******************************************************************************
3 * MODULE     : packrat_parser.cpp
4 * DESCRIPTION: efficient packrat parsing
5 * COPYRIGHT  : (C) 2010  Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11 
12 #include "packrat_parser.hpp"
13 #include "analyze.hpp"
14 #include "drd_std.hpp"
15 #include "language.hpp" //(en|de)code_color
16 
17 extern tree the_et;
18 bool packrat_invalid_colors= false;
19 
20 /******************************************************************************
21 * Constructor
22 ******************************************************************************/
23 
packrat_parser_rep(packrat_grammar gr)24 packrat_parser_rep::packrat_parser_rep (packrat_grammar gr):
25   lan_name(gr->lan_name),
26   grammar (gr->grammar),
27   productions (gr->productions),
28   properties (gr->properties),
29   current_tree (packrat_uninit),
30   current_string (""),
31   current_start (-1),
32   current_end (-1),
33   current_path_pos (-1),
34   current_pos_path (-1),
35   current_cursor (-1),
36   current_input (),
37   current_cache (PACKRAT_UNDEFINED),
38   current_production (packrat_uninit) {}
39 
40 packrat_parser
make_packrat_parser(string lan,tree in)41 make_packrat_parser (string lan, tree in) {
42   static string         last_lan   = "";
43   static tree           last_in    = "";
44   static packrat_parser last_par;
45   if (lan != last_lan || in != last_in) {
46     packrat_grammar gr= find_packrat_grammar (lan);
47     last_lan   = lan;
48     last_in    = copy (in);
49     last_par   = packrat_parser (gr, in);
50   }
51   return last_par;
52 }
53 
54 packrat_parser
make_packrat_parser(string lan,tree in,path in_pos)55 make_packrat_parser (string lan, tree in, path in_pos) {
56   static string         last_lan   = "";
57   static tree           last_in    = "";
58   static path           last_in_pos= path ();
59   static packrat_parser last_par;
60   if (lan != last_lan || in != last_in || in_pos != last_in_pos) {
61     packrat_grammar gr= find_packrat_grammar (lan);
62     last_lan   = lan;
63     last_in    = copy (in);
64     last_in_pos= copy (last_in_pos);
65     last_par   = packrat_parser (gr, in, in_pos);
66   }
67   return last_par;
68 }
69 
70 /******************************************************************************
71 * Setting up the input
72 ******************************************************************************/
73 
74 void
set_input(tree t)75 packrat_parser_rep::set_input (tree t) {
76   current_string= "";
77   current_tree  = t;
78   serialize (t, path ());
79   if (DEBUG_FLATTEN)
80     debug_packrat << "Input " << current_string << "\n";
81   current_input= encode_tokens (current_string);
82 }
83 
84 void
set_cursor(path p)85 packrat_parser_rep::set_cursor (path p) {
86   if (is_nil (p)) current_cursor= -1;
87   else current_cursor= encode_tree_position (p);
88   //cout << current_input << ", " << current_cursor << "\n";
89 }
90 
91 /******************************************************************************
92 * Encoding and decoding of cursor positions in the input
93 ******************************************************************************/
94 
95 C
encode_string_position(int i)96 packrat_parser_rep::encode_string_position (int i) {
97   if (i < 0) return PACKRAT_FAILED;
98   int j=0;
99   C k=0;
100   while (j<i && j<N(current_string)) {
101     tm_char_forwards (current_string, j);
102     k++;
103   }
104   return k;
105 }
106 
107 int
encode_path(tree t,path p,path pos)108 packrat_parser_rep::encode_path (tree t, path p, path pos) {
109   //cout << "Search " << pos << " in " << t << ", " << p << "\n";
110   //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
111   if (is_nil (pos) || !current_start->contains (p)) return -1;
112   else if (is_atomic (t)) {
113     if (current_path_pos->contains (p * pos))
114       return current_path_pos[p * pos];
115     else if (pos->item < 0 || pos->item > N(t->label)) return -1;
116     return current_start[p] + pos->item;
117   }
118   else {
119     if (pos == path (0)) return current_start[p];
120     if (pos == path (1)) return current_end[p];
121     if (pos->item < 0 || pos->item > N(t) || is_nil (pos->next)) return -1;
122     return encode_path (t[pos->item], p * pos->item, pos->next);
123   }
124 }
125 
126 C
encode_tree_position(path p)127 packrat_parser_rep::encode_tree_position (path p) {
128   if (is_nil (p) || p->item < 0) return PACKRAT_FAILED;
129   int i= encode_path (current_tree, path (), p);
130   return encode_string_position (i);
131 }
132 
133 int
decode_string_position(C pos)134 packrat_parser_rep::decode_string_position (C pos) {
135   //cout << "Decode " << pos << "\n";
136   if (pos == PACKRAT_FAILED) return -1;
137   int i=0;
138   C k=0;
139   while (i<N(current_string) && k<pos) {
140     tm_char_forwards (current_string, i);
141     k++;
142   }
143   return i;
144 }
145 
146 path
decode_path(tree t,path p,int pos)147 packrat_parser_rep::decode_path (tree t, path p, int pos) {
148   //cout << "Search " << pos << " in " << t << ", " << p << "\n";
149   //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
150   if (is_atomic (t)) {
151     if (current_pos_path->contains (pos))
152       return current_pos_path[pos];
153     else return p * (pos - current_start[p]);
154   }
155   else {
156     for (int i=0; i<N(t); i++)
157       if (pos >= current_start[p*i] && pos <= current_end[p*i])
158 	return decode_path (t[i], p * i, pos);
159     if (pos <= current_start[p]) return p * 0;
160     if (pos >= current_end[p]) return p * 1;
161     return p * 0;
162   }
163 }
164 
165 path
decode_tree_position(C pos)166 packrat_parser_rep::decode_tree_position (C pos) {
167   int i= decode_string_position (pos);
168   if (i < 0) return path (i);
169   return decode_path (current_tree, path (), i);
170 }
171 
172 /******************************************************************************
173 * Packrat parsing
174 ******************************************************************************/
175 
176 bool
starts(tree t,string s)177 starts (tree t, string s) {
178   return is_atomic (t) && starts (t->label, s);
179 }
180 
181 C
parse(C sym,C pos)182 packrat_parser_rep::parse (C sym, C pos) {
183   D key= (((D) sym) << 32) + ((D) (sym^pos));
184   C im = current_cache [key];
185   if (im != PACKRAT_UNDEFINED) {
186     //cout << "Cached " << sym << " at " << pos << " -> " << im << LF;
187     return im;
188   }
189   current_cache (key)= PACKRAT_FAILED;
190   if (DEBUG_PACKRAT)
191     debug_packrat << "Parse " << packrat_decode[sym]
192                   << " at " << pos << INDENT << LF;
193   if (sym >= PACKRAT_TM_OPEN) {
194     array<C> inst= grammar [sym];
195     //cout << "Parse " << inst << " at " << pos << LF;
196     switch (inst[0]) {
197     case PACKRAT_OR:
198       im= PACKRAT_FAILED;
199       for (int i=1; i<N(inst); i++) {
200 	im= parse (inst[i], pos);
201 	if (im != PACKRAT_FAILED) break;
202       }
203       break;
204     case PACKRAT_CONCAT:
205       im= pos;
206       for (int i=1; i<N(inst); i++) {
207 	im= parse (inst[i], im);
208 	if (im == PACKRAT_FAILED) break;
209       }
210       break;
211     case PACKRAT_WHILE:
212       im= pos;
213       while (true) {
214 	C next= parse (inst[1], im);
215 	if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
216 	im= next;
217       }
218       break;
219     case PACKRAT_REPEAT:
220       im= parse (inst[1], pos);
221       if (im != PACKRAT_FAILED)
222 	while (true) {
223 	  C next= parse (inst[1], im);
224 	  if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
225 	  im= next;
226 	}
227       break;
228     case PACKRAT_RANGE:
229       if (pos < N (current_input) &&
230 	  current_input [pos] >= inst[1] &&
231 	  current_input [pos] <= inst[2])
232 	im= pos + 1;
233       else im= PACKRAT_FAILED;
234       break;
235     case PACKRAT_NOT:
236       if (parse (inst[1], pos) == PACKRAT_FAILED) im= pos;
237       else im= PACKRAT_FAILED;
238       break;
239     case PACKRAT_EXCEPT:
240       im= parse (inst[1], pos);
241       if (im != PACKRAT_FAILED)
242 	if (parse (inst[2], pos) != PACKRAT_FAILED)
243 	  im= PACKRAT_FAILED;
244       break;
245     case PACKRAT_TM_OPEN:
246       if (pos < N (current_input) &&
247 	  starts (packrat_decode[current_input[pos]], "<\\"))
248 	im= pos + 1;
249       else im= PACKRAT_FAILED;
250       break;
251     case PACKRAT_TM_ANY:
252       im= pos;
253       while (true) {
254 	C old= im;
255 	im= parse (PACKRAT_TM_OPEN, old);
256 	if (im == PACKRAT_FAILED)
257 	  im= parse (PACKRAT_TM_LEAF, old);
258 	else {
259 	  im= parse (PACKRAT_TM_ARGS, im);
260 	  if (im != PACKRAT_FAILED)
261 	    im= parse (encode_token ("</>"), im);
262 	}
263 	if (old == im) break;
264       }
265       break;
266     case PACKRAT_TM_ARGS:
267       im= parse (PACKRAT_TM_ANY, pos);
268       while (im < N (current_input))
269 	if (current_input[im] != encode_token ("<|>")) break;
270 	else im= parse (PACKRAT_TM_ANY, im + 1);
271       break;
272     case PACKRAT_TM_LEAF:
273       im= pos;
274       while (im < N (current_input)) {
275 	tree t= packrat_decode[current_input[im]];
276 	if (starts (t, "<\\") || t == "<|>" || t == "</>") break;
277 	else im++;
278       }
279       break;
280     case PACKRAT_TM_CHAR:
281       if (pos >= N (current_input)) im= PACKRAT_FAILED;
282       else {
283 	tree t= packrat_decode[current_input[pos]];
284 	if (starts (t, "<\\") || t == "<|>" || t == "</>") im= PACKRAT_FAILED;
285 	else im= pos + 1;
286       }
287       break;
288     case PACKRAT_TM_CURSOR:
289       if (pos == current_cursor) im= pos;
290       else im= PACKRAT_FAILED;
291       break;
292     case PACKRAT_TM_FAIL:
293       im= PACKRAT_FAILED;
294       break;
295     default:
296       im= parse (inst[0], pos);
297       break;
298     }
299   }
300   else {
301     if (pos < N (current_input) && current_input[pos] == sym) im= pos + 1;
302     else im= PACKRAT_FAILED;
303   }
304   current_cache (key)= im;
305   if (DEBUG_PACKRAT)
306     debug_packrat << UNINDENT << "Parsed " << packrat_decode[sym]
307                   << " at " << pos << " -> " << im << LF;
308   return im;
309 }
310 
311 /******************************************************************************
312 * Inspecting the parse tree
313 ******************************************************************************/
314 
315 void
inspect(C sym,C pos,array<C> & syms,array<C> & poss)316 packrat_parser_rep::inspect (C sym, C pos, array<C>& syms, array<C>& poss) {
317   syms= array<C> ();
318   poss= array<C> ();
319   C next= parse (sym, pos);
320   if (next == PACKRAT_FAILED) return;
321   if (sym >= PACKRAT_TM_OPEN) {
322     array<C> inst= grammar [sym];
323     //cout << "Parse " << inst << " at " << pos << LF;
324     switch (inst[0]) {
325     case PACKRAT_OR:
326       for (int i=1; i<N(inst); i++)
327 	if (parse (inst[i], pos) != PACKRAT_FAILED) {
328 	  inspect (inst[i], pos, syms, poss);
329 	  break;
330 	}
331       break;
332     case PACKRAT_CONCAT:
333       for (int i=1; i<N(inst); i++) {
334 	next= parse (inst[i], pos);
335 	if (next == PACKRAT_FAILED) break;
336         syms << inst[i];
337         poss << pos;
338 	pos= next;
339       }
340       break;
341     case PACKRAT_WHILE:
342     case PACKRAT_REPEAT:
343       while (true) {
344         C next= parse (inst[1], pos);
345         if (next == PACKRAT_FAILED) break;
346         syms << inst[1];
347         poss << pos;
348         pos= next;
349       }
350       break;
351     case PACKRAT_RANGE:
352     case PACKRAT_NOT:
353       break;
354     case PACKRAT_EXCEPT:
355       inspect (inst[1], pos, syms, poss);
356       break;
357     case PACKRAT_TM_OPEN:
358     case PACKRAT_TM_ANY:
359     case PACKRAT_TM_ARGS:
360     case PACKRAT_TM_LEAF:
361     case PACKRAT_TM_CHAR:
362     case PACKRAT_TM_CURSOR:
363     case PACKRAT_TM_FAIL:
364       break;
365     default:
366       inspect (inst[0], pos, syms, poss);
367       break;
368     }
369   }
370 }
371 
372 bool
is_left_recursive(C sym)373 packrat_parser_rep::is_left_recursive (C sym) {
374   if (sym < PACKRAT_TM_OPEN) return false;
375   array<C> inst= grammar [sym];
376   if (inst[0] != PACKRAT_CONCAT || N(inst) != 3) return false;
377   if (inst[1] < PACKRAT_TM_OPEN) return false;
378   tree t= packrat_decode[inst[1]];
379   return is_compound (t, "symbol", 1) && ends (t[0]->label, "-head");
380 }
381 
382 bool
is_associative(C sym)383 packrat_parser_rep::is_associative (C sym) {
384   static C prop= encode_symbol (compound ("property", "associativity"));
385   D key = (((D) prop) << 32) + ((D) (sym ^ prop));
386   if (!properties->contains (key)) return false;
387   return properties[key] == "associative";
388 }
389 
390 bool
is_anti_associative(C sym)391 packrat_parser_rep::is_anti_associative (C sym) {
392   static C prop= encode_symbol (compound ("property", "associativity"));
393   D key = (((D) prop) << 32) + ((D) (sym ^ prop));
394   if (!properties->contains (key)) return false;
395   return properties[key] == "anti-associative";
396 }
397 
398 bool
is_list_like(C sym)399 packrat_parser_rep::is_list_like (C sym) {
400   (void) sym;
401   return false;
402 }
403 
404 bool
is_selectable(C sym)405 packrat_parser_rep::is_selectable (C sym) {
406   tree t= packrat_decode[sym];
407   if (is_compound (t, "partial", 1)) return true;
408   if (!is_compound (t, "symbol", 1)) return false;
409   string s= t[0]->label;
410   return !ends (s, "-head") && !ends (s, "-tail");
411 }
412 
413 /******************************************************************************
414 * Finding all enclosing structures at a given position
415 ******************************************************************************/
416 
417 void
context(C sym,C pos,C w1,C w2,int mode,array<C> & kind,array<C> & begin,array<C> & end)418 packrat_parser_rep::context
419   (C sym, C pos, C w1, C w2, int mode,
420    array<C>& kind, array<C>& begin, array<C>& end)
421 {
422   C next= parse (sym, pos);
423   if (next < 0 || pos > w1 || next < w2) return;
424 
425   if (mode == 2 && (pos == w1 || next == w2)) {
426     static C prop= encode_symbol (compound ("property", "operator"));
427     D key = (((D) prop) << 32) + ((D) (sym ^ prop));
428     if (properties->contains (key)) return;
429   }
430 
431   if (true) {
432     static C sel_prop= encode_symbol (compound ("property", "selectable"));
433     static C foc_prop= encode_symbol (compound ("property", "focus"));
434     D sel_key = (((D) sel_prop) << 32) + ((D) (sym ^ sel_prop));
435     D foc_key = (((D) foc_prop) << 32) + ((D) (sym ^ foc_prop));
436     if (properties->contains (sel_key) &&
437         properties[sel_key] == "inside");
438     else if (properties->contains (foc_key) &&
439              properties[foc_key] == "disallow" &&
440              mode == 2);
441     else {
442       int n= N(kind);
443       if (n >= 1 && begin[n-1] == pos && end[n-1] == next) {
444         if (is_selectable (sym) || !is_selectable (kind[n-1]))
445           kind[n-1]= sym;
446       }
447       else {
448         kind  << sym;
449         begin << pos;
450         end   << next;
451       }
452     }
453   }
454 
455   if (mode >= 0) {
456     static C prop= encode_symbol (compound ("property", "atomic"));
457     D key = (((D) prop) << 32) + ((D) (sym ^ prop));
458     if (properties->contains (key)) return;
459   }
460 
461   if (is_left_recursive (sym) && mode == 0) {
462     array<C> inst= grammar [sym];
463     C before= pos;
464     C middle= parse (inst[1], before);
465     if (middle == PACKRAT_FAILED) return;
466     C after = parse (inst[2], middle);
467     if (after == PACKRAT_FAILED) return;
468     array<C> csym;
469     array<C> cpos;
470     inspect (inst[2], middle, csym, cpos);
471     csym= append (inst[1], csym);
472     cpos= append (before, cpos);
473     cpos << after;
474     int i1, i2;
475     for (i1=0; i1<N(csym); i1++)
476       if (cpos[i1+1] > w1) break;
477     for (i2=i1; i2<N(csym); i2++)
478       if (cpos[i2+1] >= w2) break;
479     if (i1 == i2) {
480       int i, n= N(kind);
481       context (csym[i1], cpos[i1], w1, w2, mode, kind, begin, end);
482       for (i=n; i<N(kind); i++)
483         if (is_selectable (kind[i]))
484           return;
485       kind  -> resize (n);
486       begin -> resize (n);
487       end   -> resize (n);
488     }
489     C alt_start= -1;
490     while (i1 > 0) {
491       array<C> ccsym;
492       array<C> ccpos;
493       inspect (csym[i1], cpos[i1], ccsym, ccpos);
494       if (N(ccsym)>1 && is_associative (ccsym[0])) {
495         if (w1 >= ccpos[1]) alt_start= ccpos[1];
496         break;
497       }
498       if (N(ccsym)>0 && is_anti_associative (ccsym[0])) break;
499       i1--;
500     }
501     tree sel= compound ("partial", packrat_decode[sym]);
502     kind  << encode_symbol (sel);
503     begin << (alt_start<0? cpos[i1]: alt_start);
504     end   << cpos[i2+1];
505     return;
506   }
507 
508   if (sym >= PACKRAT_TM_OPEN) {
509     array<C> inst= grammar [sym];
510     //cout << "Context " << inst << " at " << pos << LF;
511     switch (inst[0]) {
512     case PACKRAT_OR:
513       for (int i=1; i<N(inst); i++)
514 	if (parse (inst[i], pos) != PACKRAT_FAILED) {
515 	  context (inst[i], pos, w1, w2, mode, kind, begin, end);
516 	  break;
517 	}
518       break;
519     case PACKRAT_CONCAT:
520       for (int i=1; i<N(inst); i++) {
521 	next= parse (inst[i], pos);
522 	if (next == PACKRAT_FAILED) break;
523 	if (pos <= w1 && w2 <= next)
524 	  context (inst[i], pos, w1, w2, mode, kind, begin, end);
525 	if (next > w2) break;
526 	pos= next;
527       }
528       break;
529     case PACKRAT_WHILE:
530     case PACKRAT_REPEAT:
531       while (true) {
532 	C next= parse (inst[1], pos);
533 	if (next == PACKRAT_FAILED) break;
534 	if (pos <= w1 && w2 <= next)
535 	  context (inst[1], pos, w1, w2, mode, kind, begin, end);
536 	if (next > w2) break;
537 	pos= next;
538       }
539       break;
540     case PACKRAT_RANGE:
541     case PACKRAT_NOT:
542       break;
543     case PACKRAT_EXCEPT:
544       context (inst[1], pos, w1, w2, mode, kind, begin, end);
545       break;
546     case PACKRAT_TM_OPEN:
547     case PACKRAT_TM_ANY:
548     case PACKRAT_TM_ARGS:
549     case PACKRAT_TM_LEAF:
550     case PACKRAT_TM_CHAR:
551     case PACKRAT_TM_CURSOR:
552     case PACKRAT_TM_FAIL:
553       break;
554     default:
555       context (inst[0], pos, w1, w2, mode, kind, begin, end);
556       break;
557     }
558   }
559 }
560 
561 void
compress(array<C> & kind,array<C> & begin,array<C> & end)562 packrat_parser_rep::compress
563   (array<C>& kind, array<C>& begin, array<C>& end)
564 {
565   array<C> new_kind, new_begin, new_end;
566   for (int i=0; i<N(kind); i++) {
567     int n= N(new_kind);
568     if (is_selectable (kind[i]))
569       if (N(new_kind) == 0 ||
570 	  new_kind [n-1] != kind[i] ||
571 	  (new_begin[n-1] != begin[i] && new_end[n-1] != end[i])) {
572 	new_kind  << kind[i];
573 	new_begin << begin[i];
574 	new_end   << end[i];
575       }
576   }
577   kind = new_kind;
578   begin= new_begin;
579   end  = new_end;
580 }
581 
582 /******************************************************************************
583 * Syntax highlighting
584 ******************************************************************************/
585 
586 void
highlight(tree t,path tp,path p1,path p2,int col)587 packrat_parser_rep::highlight (tree t, path tp, path p1, path p2, int col) {
588   if (p1 == p2);
589   else if (is_atomic (t)) {
590     string s= t->label;
591     ASSERT (is_atom (p1) && is_atom (p2), "invalid selection");
592     ASSERT (0 <= p1->item && p1->item <= p2->item && p2->item <= N(s),
593 	    "invalid selection");
594     attach_highlight (t, current_hl_lan, col, p1->item, p2->item);
595   }
596   else if (N(t) == 0);
597   else {
598     ASSERT (!is_nil (p1) && !is_nil (p2) && p1->item <= p2->item,
599 	    "invalid selection");
600     if (p1 == path (0)) p1= path (0, 0);
601     if (p2 == path (1)) p2= path (N(t) - 1, right_index (t[N(t) -1]));
602     for (int i= max (0, p1->item); i <= min (p2->item, N(t)-1); i++) {
603       path q1= (i == p1->item? p1->next: path (0));
604       path q2= (i == p2->item? p2->next: path (right_index (t[i])));
605       highlight (t[i], tp * i, q1, q2, col);
606     }
607   }
608 }
609 
610 void
highlight(C sym,C pos)611 packrat_parser_rep::highlight (C sym, C pos) {
612   C next= parse (sym, pos);
613   if (next < 0) return;
614   if (sym >= PACKRAT_SYMBOLS) {
615     static C prop= encode_symbol (compound ("property", "highlight"));
616     D key = (((D) prop) << 32) + ((D) (sym ^ prop));
617     if (properties->contains (key)) {
618       int  col  = encode_color (properties [key]);
619       path start= decode_tree_position (pos);
620       path end  = decode_tree_position (next);
621       highlight (current_tree, path (), start, end, col);
622       static C prop= encode_symbol (compound ("property", "transparent"));
623       D key = (((D) prop) << 32) + ((D) (sym ^ prop));
624       if (!properties->contains (key)) return;
625     }
626   }
627 
628   if (sym >= PACKRAT_TM_OPEN) {
629     array<C> inst= grammar [sym];
630     //cout << "Parse " << inst << " at " << pos << LF;
631     switch (inst[0]) {
632     case PACKRAT_OR:
633       for (int i=1; i<N(inst); i++)
634 	if (parse (inst[i], pos) != PACKRAT_FAILED) {
635 	  highlight (inst[i], pos);
636 	  break;
637 	}
638       break;
639     case PACKRAT_CONCAT:
640       for (int i=1; i<N(inst); i++) {
641 	next= parse (inst[i], pos);
642 	highlight (inst[i], pos);
643 	pos= next;
644       }
645       break;
646     case PACKRAT_WHILE:
647     case PACKRAT_REPEAT:
648       while (true) {
649 	C next= parse (inst[1], pos);
650 	if (next == PACKRAT_FAILED) break;
651 	highlight (inst[1], pos);
652 	if (next == pos) break;
653 	pos= next;
654       }
655       break;
656     case PACKRAT_RANGE:
657     case PACKRAT_NOT:
658       break;
659     case PACKRAT_EXCEPT:
660       highlight (inst[1], pos);
661       break;
662     case PACKRAT_TM_OPEN:
663     case PACKRAT_TM_ANY:
664     case PACKRAT_TM_ARGS:
665     case PACKRAT_TM_LEAF:
666     case PACKRAT_TM_CHAR:
667     case PACKRAT_TM_CURSOR:
668     case PACKRAT_TM_FAIL:
669       break;
670     default:
671       highlight (inst[0], pos);
672       break;
673     }
674   }
675 }
676 
677 /******************************************************************************
678 * Memoized and accelerated highlighting
679 ******************************************************************************/
680 
681 static bool
empty_line(tree t)682 empty_line (tree t) {
683   if (!is_atomic (t)) return false;
684   string s= t->label;
685   for (int i=0; i<N(s); i++)
686     if (s[i] != ' ') return false;
687   return true;
688 }
689 
690 static bool
consistent_portion(tree t,int begin,int end)691 consistent_portion (tree t, int begin, int end) {
692   int level= 0;
693   for (int i=begin; i<end; i++)
694     if (is_atomic (t[i])) {
695       string s= t[i]->label;
696       for (int j=0; j<N(s); j++)
697 	switch (s[j]) {
698 	case '(': level++; break;
699 	case ')': if (level <= 0) return false; level--; break;
700 	case '[': level++; break;
701 	case ']': if (level <= 0) return false; level--; break;
702 	case '{': level++; break;
703 	case '}': if (level <= 0) return false; level--; break;
704 	default : break;
705 	}
706     }
707   return level == 0;
708 }
709 
710 static void
consistent_enlargement(tree t,int & begin,int & end)711 consistent_enlargement (tree t, int& begin, int& end) {
712   while (begin > 0 || end < N(t)) {
713     while (begin > 0    && !empty_line (t[begin-1])) begin--;
714     while (end   < N(t) && !empty_line (t[end    ])) end++;
715     if (consistent_portion (t, begin, end)) return;
716     //cout << "Inconsistent " << begin << " -- " << end << "\n";
717     begin= max (0   , begin - max (end - begin, 1));
718     end  = min (N(t), end   + max (end - begin, 1));
719     //cout << "  Try " << begin << " -- " << end << "\n";
720   }
721 }
722 
723 /******************************************************************************
724 * User interface
725 ******************************************************************************/
726 
727 path
packrat_parse(string lan,string sym,tree in)728 packrat_parse (string lan, string sym, tree in) {
729   packrat_parser par= make_packrat_parser (lan, in);
730   C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
731   return par->decode_tree_position (pos);
732 }
733 
734 bool
packrat_correct(string lan,string sym,tree in)735 packrat_correct (string lan, string sym, tree in) {
736   packrat_parser par= make_packrat_parser (lan, in);
737   C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
738   return pos == N(par->current_input);
739 }
740 
741 bool
packrat_available_path(string lan,tree in,path in_p)742 packrat_available_path (string lan, tree in, path in_p) {
743   packrat_parser par= make_packrat_parser (lan, in);
744   return par->current_start->contains (in_p);
745 }
746 
747 object
packrat_context(string lan,string s,tree in,path in_pos)748 packrat_context (string lan, string s, tree in, path in_pos) {
749   //cout << "Context " << in << " at " << in_pos
750   //     << " (" << lan << ", " << s << ")" << LF;
751   packrat_parser par= make_packrat_parser (lan, in);
752   C sym= encode_symbol (compound ("symbol", s));
753   if (par->parse (sym, 0) != N(par->current_input))
754     par= make_packrat_parser (lan, in, in_pos);
755   C pos= par->encode_tree_position (in_pos);
756   if (pos == PACKRAT_FAILED) return object (false);
757   array<C> kind, begin, end;
758   par->context (sym, 0, pos-1, pos+1, 0, kind, begin, end);
759   par->compress (kind, begin, end);
760   object ret= null_object ();
761   for (int i=0; i<N(kind); i++) {
762     object x1 (symbol_object (packrat_decode[kind[i]][0]->label));
763     object x2 (par->decode_tree_position (begin[i]));
764     object x3 (par->decode_tree_position (end[i]));
765     ret= cons (list_object (x1, x2, x3), ret);
766   }
767   return ret;
768 }
769 
770 bool
packrat_select(string lan,string s,tree in,path in_pos,path & p1,path & p2,int mode)771 packrat_select (string lan, string s, tree in, path in_pos,
772 		path& p1, path& p2, int mode)
773 {
774   // mode= 0: genuine semantic selection
775   // mode= 1: strictly larger selection for select_enlarge
776   // mode= 2: determine environment rectangles
777   if (path_less (p2, p1))
778     return packrat_select (lan, s, in, in_pos, p2, p1, mode);
779   //cout << "Enlarge " << p1 << " -- " << p2 << " in " << in
780   //<< " (" << lan << ", " << s << ")" << LF;
781   packrat_parser par= make_packrat_parser (lan, in);
782   C sym = encode_symbol (compound ("symbol", s));
783   if (par->parse (sym, 0) != N(par->current_input))
784     par= make_packrat_parser (lan, in, in_pos);
785   C pos1= par->encode_tree_position (p1);
786   C pos2= par->encode_tree_position (p2);
787   //cout << "Encoded " << pos1 << " -- " << pos2
788   //     << " in " << par->current_string << LF;
789   if (par->parse (sym, 0) != N(par->current_input)) return false;
790   if (pos1 == PACKRAT_FAILED || pos2 == PACKRAT_FAILED) return false;
791   array<C> kind, begin, end;
792   C pos0= pos1;
793   if ((mode == 1 && pos1 == pos2) || mode == 2) pos0= max (pos1 - 1, 0);
794   par->context (sym, 0, pos0, pos2, mode, kind, begin, end);
795   //for (int i=0; i<N(kind); i++)
796   //  cout << i << ":\t"
797   //       << par->decode_tree_position (begin[i]) << "\t"
798   //       << par->decode_tree_position (end[i]) << "\t"
799   //       << packrat_decode[kind[i]] << LF;
800   par->compress (kind, begin, end);
801   int n= N(kind);
802   if (n == 0) return false;
803   if (mode == 1) {
804     if (pos1 == begin[n-1] && pos2 == end[n-1]) n--;
805     if (n == 0) return false;
806   }
807   p1= par->decode_tree_position (begin[n-1]);
808   p2= par->decode_tree_position (end[n-1]);
809   //cout << "Selected " << packrat_decode[kind[n-1]] << LF;
810   return true;
811 }
812 
813 void
packrat_highlight_subtree(string lan,string s,tree in)814 packrat_highlight_subtree (string lan, string s, tree in) {
815   //cout << "Highlight " << lan << ", " << s << " in " << in << "\n";
816   int hl_lan= packrat_abbreviation (lan, s);
817   if (hl_lan == 0) return;
818   packrat_parser par= make_packrat_parser (lan, in);
819   C sym = encode_symbol (compound ("symbol", s));
820   if (par->parse (sym, 0) == N(par->current_input)) {
821     par->current_hl_lan= hl_lan;
822     par->highlight (sym, 0);
823   }
824 }
825 
826 void
packrat_highlight(string lan,string s,tree in)827 packrat_highlight (string lan, string s, tree in) {
828   int hl_lan= packrat_abbreviation (lan, s);
829   if (hl_lan == 0) return;
830   //cout << "Highlight " << in << "\n";
831   if (is_func (in, DOCUMENT)) {
832     int i, begin, end;
833     for (begin=0; begin<N(in); begin++)
834       if (!has_highlight (in[begin], hl_lan))
835 	break;
836     for (end=N(in)-1; end>begin; end--)
837       if (!has_highlight (in[end-1], hl_lan))
838 	break;
839     consistent_enlargement (in, begin, end);
840     for (i=begin; i<end; i++)
841       detach_highlight (in[i], hl_lan);
842     attach_highlight (in, hl_lan);
843     packrat_highlight_subtree (lan, s, in (begin, end));
844   }
845   else {
846     if (is_compound (in))
847       for (int i=0; i<N(in); i++)
848 	detach_highlight (in[i], hl_lan);
849     attach_highlight (in, hl_lan);
850     packrat_highlight_subtree (lan, s, in);
851   }
852 }
853