1 #include "automaton.h"
2 #include "internal.h"
3 
4 // the input automaton, walked for all escape sequences. an escape sequence is
5 // everything from an escape through recognized termination of that escape, or
6 // abort of the sequence via another escape, save the case of DCS sequences
7 // (those beginning with Escape-P), which are terminated by the ST sequence
8 // Escape-\. in the case of an aborted sequence, the sequence in its entirety
9 // is replayed as regular input. regular input is not driven through this
10 // automaton.
11 //
12 // one complication is that the user can just press escape themselves, followed
13 // by arbitrary other keypresses. when input is redirected from some source
14 // other than the connected terminal, this is no problem: we know control
15 // sequences to be coming in from the connected terminal, and everything else
16 // is bulk input.
17 
18 // we assumed escapes can only be composed of 7-bit chars
19 typedef struct esctrie {
20   // if non-NULL, this is the next level of radix-128 trie. it is NULL on
21   // accepting nodes, since no valid control sequence is a prefix of another
22   // valid control sequence. links are 1-biased (0 is NULL).
23   unsigned* trie;
24   enum {
25     NODE_SPECIAL,  // an accepting node, or pure transit (if ni.id == 0)
26     NODE_NUMERIC,  // accumulates a number
27     NODE_STRING,   // accumulates a string
28     NODE_FUNCTION, // invokes a function
29   } ntype;
30   ncinput ni;      // composed key terminating here
31   triefunc fxn;    // function to call on match
32   unsigned kleene; // idx of kleene match
33 } esctrie;
34 
35 // get node corresponding to 1-biased index
36 static inline esctrie*
esctrie_from_idx(const automaton * a,unsigned idx)37 esctrie_from_idx(const automaton* a, unsigned idx){
38   if(idx == 0){
39     return NULL;
40   }
41   return a->nodepool + (idx - 1);
42 }
43 
44 // return 1-biased index of node in pool
45 static inline unsigned
esctrie_idx(const automaton * a,const esctrie * e)46 esctrie_idx(const automaton* a, const esctrie* e){
47   return e - a->nodepool + 1;
48 }
49 
esctrie_id(const esctrie * e)50 uint32_t esctrie_id(const esctrie* e){
51   return e->ni.id;
52 }
53 
54 // returns the idx of the new node, or 0 on failure (idx is 1-biased).
55 // *invalidates any existing escnode pointers!*
56 static inline unsigned
create_esctrie_node(automaton * a,int special)57 create_esctrie_node(automaton* a, int special){
58   if(a->poolused == a->poolsize){
59     unsigned newsize = a->poolsize ? a->poolsize * 2 : 512;
60     esctrie* tmp = realloc(a->nodepool, sizeof(*a->nodepool) * newsize);
61     if(tmp == NULL){
62       return 0;
63     }
64     a->nodepool = tmp;
65     a->poolsize = newsize;
66   }
67   esctrie* e = &a->nodepool[a->poolused++];
68   memset(e, 0, sizeof(*e));
69   e->ntype = NODE_SPECIAL;
70   if((e->ni.id = special) == 0){
71     const size_t tsize = sizeof(*e->trie) * 0x80;
72     if((e->trie = malloc(tsize)) == NULL){
73       --a->poolused;
74       return 0;
75     }
76     memset(e->trie, 0, tsize);
77   }
78   return esctrie_idx(a, e);
79 }
80 
input_free_esctrie(automaton * a)81 void input_free_esctrie(automaton* a){
82   a->escapes = 0;
83   a->poolsize = 0;
84   for(unsigned i = 0 ; i < a->poolused ; ++i){
85     free(a->nodepool[i].trie);
86   }
87   free(a->nodepool);
88   a->poolused = 0;
89   a->nodepool = NULL;
90 }
91 
92 static int
esctrie_make_kleene(automaton * a,esctrie * e,unsigned follow,esctrie * term)93 esctrie_make_kleene(automaton* a, esctrie* e, unsigned follow, esctrie* term){
94   if(e->ntype != NODE_SPECIAL){
95     logerror("can't make node type %d string\n", e->ntype);
96     return -1;
97   }
98   for(unsigned i = 0 ; i < 0x80 ; ++i){
99     if(i == follow){
100       e->trie[i] = esctrie_idx(a, term);
101     }else if(e->trie[i] == 0){
102       e->trie[i] = esctrie_idx(a, e);
103     }
104   }
105   return 0;
106 }
107 
108 static int
esctrie_make_function(esctrie * e,triefunc fxn)109 esctrie_make_function(esctrie* e, triefunc fxn){
110   if(e->ntype != NODE_SPECIAL){
111     logerror("can't make node type %d function\n", e->ntype);
112     return -1;
113   }
114   if(e->trie){
115     logerror("can't make followed function\n");
116     return -1;
117   }
118   e->ntype = NODE_FUNCTION;
119   e->fxn = fxn;
120   return 0;
121 }
122 
123 static esctrie*
esctrie_make_string(automaton * a,esctrie * e,unsigned rxvtstyle)124 esctrie_make_string(automaton* a, esctrie* e, unsigned rxvtstyle){
125   if(e->ntype == NODE_STRING){
126     logerror("repeated string node\n");
127     return NULL;
128   }
129   if(e->ntype != NODE_SPECIAL){
130     logerror("can't make node type %d string\n", e->ntype);
131     return NULL;
132   }
133   for(int i = 0 ; i < 0x80 ; ++i){
134     if(!isprint(i)){
135       continue;
136     }
137     if(e->trie[i]){
138       logerror("can't make %c-followed string\n", i);
139       return NULL;
140     }
141   }
142   esctrie* newe = esctrie_from_idx(a, create_esctrie_node(a, 0));
143   if(newe == NULL){
144     return NULL;
145   }
146   for(int i = 0 ; i < 0x80 ; ++i){
147     if(!isprint(i)){
148       continue;
149     }
150     e->trie[i] = esctrie_idx(a, newe);
151   }
152   e = newe;
153   e->ntype = NODE_STRING;
154   for(int i = 0 ; i < 0x80 ; ++i){
155     if(!isprint(i)){
156       continue;
157     }
158     e->trie[i] = esctrie_idx(a, newe);
159   }
160   if(rxvtstyle){ // ends with bare ESC, not BEL/ST
161     if((e->trie[0x1b] = create_esctrie_node(a, 0)) == 0){
162       return NULL;
163     }
164     e = esctrie_from_idx(a, e->trie[0x1b]);
165     e->ni.id = 0;
166     e->ntype = NODE_SPECIAL;
167   }else{
168     if((e->trie[0x07] = create_esctrie_node(a, NCKEY_INVALID)) == 0){
169       return NULL;
170     }
171     esctrie* term = esctrie_from_idx(a, e->trie[0x07]);
172     if((e->trie[0x1b] = create_esctrie_node(a, 0)) == 0){
173       return NULL;
174     }
175     e = esctrie_from_idx(a, e->trie[0x1b]);
176     e->trie['\\'] = esctrie_idx(a, term);
177     term->ni.id = 0;
178     term->ntype = NODE_SPECIAL;
179     e = term;
180   }
181   logdebug("made string: %u\n", esctrie_idx(a, e));
182   return e;
183 }
184 
185 static esctrie*
link_kleene(automaton * a,esctrie * e,unsigned follow)186 link_kleene(automaton* a, esctrie* e, unsigned follow){
187   unsigned eidx = esctrie_idx(a, e);
188   if(e->kleene){
189     return a->nodepool + e->kleene;
190   }
191   // invalidates e
192   unsigned termidx = create_esctrie_node(a, 0);
193   unsigned targidx = create_esctrie_node(a, 0);
194   esctrie* term = esctrie_from_idx(a, termidx);
195   esctrie* targ = esctrie_from_idx(a, targidx);
196   if(targ == NULL){
197     return NULL;
198   }
199   if(term == NULL){
200     return NULL;
201   }
202   if(esctrie_make_kleene(a, targ, follow, term)){
203     return NULL;
204   }
205   e = esctrie_from_idx(a, eidx);
206   // fill in all NULL numeric links with the new target
207   for(unsigned int i = 0 ; i < 0x80 ; ++i){
208     if(i == follow){
209       if(e->trie[i]){
210         logerror("drain terminator already registered\n");
211         return NULL;
212       }
213       e->trie[follow] = esctrie_idx(a, term);
214     }else if(e->trie[i] == 0){
215       e->trie[i] = esctrie_idx(a, targ);
216     }
217   }
218   targ->kleene = esctrie_idx(a, targ);
219   return esctrie_from_idx(a, e->trie[follow]);
220 }
221 
222 // phase 1 of the numeric algorithm; find a φ node on e. not sure what
223 // to do if we have non-φ links at every digit...punt for now FIXME.
224 static inline unsigned
get_phi_node(automaton * a,esctrie * e)225 get_phi_node(automaton* a, esctrie* e){
226   // find a linked NODE_NUMERIC, if one exists. we'll want to reuse it.
227   int nonphis = 0;
228   esctrie* targ;
229   for(int i = '0' ; i <= '9' ; ++i){
230     if( (targ = esctrie_from_idx(a, e->trie[i])) ){
231       if(targ->ntype == NODE_NUMERIC){
232         logtrace("found existing phi node %u[%c]->%u\n", esctrie_idx(a, e), i, esctrie_idx(a, targ));
233         break;
234       }else{
235         ++nonphis;
236         targ = NULL;
237       }
238     }
239   }
240   // we either have a numeric target, or will make one now. if we create a new
241   // one, be sure to mark it numeric, and add all digit links back to itself.
242   if(targ == NULL){
243     if(nonphis == 10){
244       logerror("ten non-phi links from %u\n", esctrie_idx(a, e));
245       return 0;
246     }
247     if((targ = esctrie_from_idx(a, create_esctrie_node(a, 0))) == 0){
248       return 0;
249     }
250     targ->ntype = NODE_NUMERIC;
251     for(int i = '0' ; i <= '9' ; ++i){
252       targ->trie[i] = esctrie_idx(a, targ);
253     }
254   }
255   assert(NODE_NUMERIC == targ->ntype);
256   return esctrie_idx(a, targ);
257 }
258 
259 // phase 2 of the numeric algorithm; find a ή node for |successor| on |phi|.
260 static inline unsigned
get_eta_node(automaton * a,esctrie * phi,unsigned successor)261 get_eta_node(automaton* a, esctrie* phi, unsigned successor){
262   unsigned phiidx = esctrie_idx(a, phi);
263   unsigned etaidx = phi->trie[successor];
264   esctrie* eta = esctrie_from_idx(a, etaidx);
265   if(eta == NULL){
266     // invalidates phi
267     if((eta = esctrie_from_idx(a, create_esctrie_node(a, 0))) == NULL){
268       return 0;
269     }
270     phi = esctrie_from_idx(a, phiidx);
271     phi->trie[successor] = esctrie_idx(a, eta);
272   }
273   return esctrie_idx(a, eta);
274 }
275 
276 // |e| is a known-standard node reached by our prefix; go ahead and prep both
277 // phi and eta links from it.
278 static inline void
add_phi_and_eta_chain(const automaton * a,esctrie * e,unsigned phi,unsigned follow,unsigned eta)279 add_phi_and_eta_chain(const automaton *a, esctrie* e, unsigned phi,
280                       unsigned follow, unsigned eta){
281 //logtrace("working with %u phi: %u follow: %u eta: %u\n", esctrie_idx(a, e), phi, follow, eta);
282   for(int i = '0' ; i <= '9' ; ++i){
283     esctrie* chain = esctrie_from_idx(a, e->trie[i]);
284     if(chain == NULL){
285       //logdebug("linking %u[%d] to %u\n", esctrie_idx(a, e), i, phi);
286       e->trie[i] = phi;
287     }else if(chain->ntype == NODE_SPECIAL){
288 //logdebug("propagating along %u[%c]\n", e->trie[i], i);
289       add_phi_and_eta_chain(a, esctrie_from_idx(a, e->trie[i]), phi, follow, eta);
290     }
291   }
292   if(e->trie[follow] == 0){
293     //logdebug("linking %u[%u] to %u\n", esctrie_idx(a, e), follow, eta);
294     e->trie[follow] = eta;
295   }
296 }
297 
298 // phase 3 of the numeric algorithm: walk the automaton, finding all nodes
299 // which are prefixes of phi (all nodes matching the prefix, and all numeric
300 // non-phi chains from those nodes) and linking them to phi, and finding all
301 // nodes which are prefixes of eta (all numeric non-phi chains from the
302 // previous set) and linking them to eta. |e| is the path thus far.
303 static inline void
add_phi_and_eta_recurse(automaton * a,esctrie * e,const char * prefix,int pfxlen,esctrie * phi,unsigned follow,esctrie * eta,unsigned inphi)304 add_phi_and_eta_recurse(automaton* a, esctrie* e, const char* prefix,
305                         int pfxlen, esctrie* phi, unsigned follow,
306                         esctrie* eta, unsigned inphi){
307 //logtrace("working with %u %d prefix [%*.*s]\n", esctrie_idx(a, e), pfxlen, pfxlen, pfxlen, prefix);
308   // if pfxlen == 0, we found a match for our fixed prefix. start adding phi
309   // links whereever we can. where we find chained numerics, add an eta link.
310   if(pfxlen == 0){
311     add_phi_and_eta_chain(a, e, esctrie_idx(a, phi), follow, esctrie_idx(a, eta));
312     return;
313   }
314   // when we hit a \N in the prefix, we must recurse along all digit links
315   if(*prefix == '\\'){
316     ++prefix;
317     --pfxlen;
318     if(*prefix != 'N'){
319       logerror("illegal wildcard in prefix %c\n", *prefix);
320       return;
321     }
322     ++prefix;
323     --pfxlen;
324     for(int i = '0' ; i <= '9' ; ++i){
325       if(e->trie[i] == 0){
326         //logdebug("linking %u[%d] to %u\n", esctrie_idx(a, e), i, esctrie_idx(a, phi));
327         e->trie[i] = esctrie_idx(a, phi);
328       }else{
329         add_phi_and_eta_recurse(a, esctrie_from_idx(a, e->trie[i]),
330                                 prefix, pfxlen, phi, follow, eta, 1);
331       }
332     }
333   }else{
334     if(inphi){
335       for(int i = '0' ; i <= '9' ; ++i){
336         if(e->trie[i] == 0){
337           //logdebug("linking %u[%d] to %u\n", esctrie_idx(a, e), i, esctrie_idx(a, phi));
338           e->trie[i] = esctrie_idx(a, phi);
339         }else if(e->trie[i] != esctrie_idx(a, e)){
340           add_phi_and_eta_recurse(a, esctrie_from_idx(a, e->trie[i]),
341                                   prefix, pfxlen, phi, follow, eta, 1);
342         }
343       }
344     }
345     unsigned char p = *prefix;
346     if(e->trie[p]){
347       add_phi_and_eta_recurse(a, esctrie_from_idx(a, e->trie[p]),
348                               prefix + 1, pfxlen - 1, phi, follow, eta, 0);
349     }
350   }
351 }
352 
353 // |prefix| does *not* lead with an escape, and does not include the numeric.
354 static inline void
add_phi_and_eta(automaton * a,const char * prefix,size_t pfxlen,esctrie * phi,unsigned follow,esctrie * eta)355 add_phi_and_eta(automaton* a, const char* prefix, size_t pfxlen,
356                 esctrie* phi, unsigned follow, esctrie* eta){
357   esctrie* esc = esctrie_from_idx(a, a->escapes);
358   if(esc == NULL){
359     return;
360   }
361   add_phi_and_eta_recurse(a, esc, prefix, pfxlen, phi, follow, eta, 0);
362 }
363 
364 // accept any digit and transition to a numeric node. |e| is the culmination of
365 // the prefix before the numeric. |follow| is the successor of the numeric.
366 // here's our approach:
367 //  - find a link to a numeric from e. there can only be one node (thought it
368 //     might have many links), so we can use the first one we find.
369 //  - if there is no such numeric node linked from e, create one.
370 //     (FIXME if all ten digits are occupied, what would we do?)
371 //  - chosen numeric node is φ.
372 //  - if an appropriate follow node exists linked from φ, choose it as ή.
373 //  - otherwise, create a new ή and link it from φ.
374 //  - walk from the top, finding all possible prefixes of φ.
375 //  - at each, link all unused digits to φ.
376 //  - from each that is also a possible prefix of ή, link ή.
377 static esctrie*
link_numeric(automaton * a,const char * prefix,int pfxlen,esctrie * e,unsigned char follow)378 link_numeric(automaton* a, const char* prefix, int pfxlen,
379              esctrie* e, unsigned char follow){
380   logdebug("adding numeric with follow %c following %*.*s\n", follow, pfxlen, pfxlen, prefix);
381   unsigned phiidx = get_phi_node(a, e);
382   if(phiidx == 0){
383     return NULL;
384   }
385   esctrie* phi = esctrie_from_idx(a, phiidx);
386   // invalidates phi
387   unsigned etaidx = get_eta_node(a, phi, follow);
388   if(etaidx == 0){
389     return NULL;
390   }
391   phi = esctrie_from_idx(a, phiidx);
392   esctrie* eta = esctrie_from_idx(a, etaidx);
393   logtrace("phi node: %u->%u\n", esctrie_idx(a, e), esctrie_idx(a, phi));
394   logtrace("eta node: %u philink[%c]: %u\n", esctrie_idx(a, eta), follow, phi->trie[follow]);
395   // eta is now bound to phi, and phi links something at all digits, but no
396   // other links are guaranteed. walk the automaton, finding all possible
397   // prefixes of φ (and linking to φ) and all possible prefixes of ή (and
398   // linking them to ή).
399   add_phi_and_eta(a, prefix, pfxlen, phi, follow, eta);
400   return eta;
401 }
402 
403 static esctrie*
insert_path(automaton * a,const char * seq)404 insert_path(automaton* a, const char* seq){
405   if(a->escapes == 0){
406     if((a->escapes = create_esctrie_node(a, 0)) == 0){
407       return NULL;
408     }
409   }
410   esctrie* eptr = esctrie_from_idx(a, a->escapes);
411   bool inescape = false;
412   const char* seqstart = seq;
413   unsigned char c;
414   while( (c = *seq++) ){
415     if(c == '\\'){
416       if(inescape){
417         logerror("illegal escape: \\\n");
418         return NULL;
419       }
420       inescape = true;
421     }else if(inescape){
422       if(c == 'N'){
423         // a numeric must be followed by some terminator
424         if(!*seq){
425           logerror("illegal numeric terminator\n");
426           return NULL;
427         }
428         c = *seq++;
429         eptr = link_numeric(a, seqstart, seq - 3 - seqstart, eptr, c);
430         if(eptr == NULL){
431           return NULL;
432         }
433       }else if(c == 'S' || c == 'R'){
434         // strings always end with ST ("\e\\") or at least ("\e")
435         if((eptr = esctrie_make_string(a, eptr, c == 'R')) == NULL){
436           return NULL;
437         }
438         return eptr;
439       }else if(c == 'D'){ // drain (kleene closure)
440         // a kleene must be followed by some terminator
441         if(!*seq){
442           logerror("illegal kleene terminator\n");
443           return NULL;
444         }
445         c = *seq++;
446         eptr = link_kleene(a, eptr, c);
447         if(eptr == NULL){
448           return NULL;
449         }
450       }else{
451         logerror("illegal escape: %u\n", c);
452         return NULL;
453       }
454       inescape = false;
455     }else{ // fixed character
456       unsigned eidx = esctrie_idx(a, eptr);
457       // invalidates eptr
458       if(eptr->trie[c] == 0 || eptr->trie[c] == eptr->kleene){
459         unsigned tidx = create_esctrie_node(a, 0);
460         if(tidx == 0){
461           return NULL;
462         }
463         eptr = esctrie_from_idx(a, eidx);
464         eptr->trie[c] = tidx;
465       }else if(esctrie_from_idx(a, eptr->trie[c])->ntype == NODE_NUMERIC){
466         // punch a hole through the numeric loop. create a new one, and fill
467         // it in with the existing target.
468         struct esctrie* newe;
469         // invalidates eptr
470         if((newe = esctrie_from_idx(a, create_esctrie_node(a, 0))) == 0){
471           return NULL;
472         }
473         eptr = esctrie_from_idx(a, eidx);
474         for(int i = 0 ; i < 0x80 ; ++i){
475           newe->trie[i] = esctrie_from_idx(a, eptr->trie[c])->trie[i];
476         }
477         eptr->trie[c] = esctrie_idx(a, newe);
478       }
479       eptr = esctrie_from_idx(a, eidx);
480       eptr = esctrie_from_idx(a, eptr->trie[c]);
481       logtrace("added fixed %c %u as %u\n", c, c, esctrie_idx(a, eptr));
482     }
483   }
484   if(inescape){
485     logerror("illegal escape at end of line\n");
486     return NULL;
487   }
488   return eptr;
489 }
490 
491 // add a cflow path to the automaton
inputctx_add_cflow(automaton * a,const char * seq,triefunc fxn)492 int inputctx_add_cflow(automaton* a, const char* seq, triefunc fxn){
493   esctrie* eptr = insert_path(a, seq);
494   if(eptr == NULL){
495     return -1;
496   }
497   free(eptr->trie);
498   eptr->trie = NULL;
499   return esctrie_make_function(eptr, fxn);
500 }
501 
502 // multiple input escapes might map to the same input
inputctx_add_input_escape(automaton * a,const char * esc,uint32_t special,unsigned shift,unsigned ctrl,unsigned alt)503 int inputctx_add_input_escape(automaton* a, const char* esc, uint32_t special,
504                               unsigned shift, unsigned ctrl, unsigned alt){
505   if(esc[0] != NCKEY_ESC || strlen(esc) < 2){ // assume ESC prefix + content
506     logerror("not an escape (0x%x)\n", special);
507     return -1;
508   }
509   esctrie* eptr = insert_path(a, esc + 1);
510   if(eptr == NULL){
511     return -1;
512   }
513   // it appears that multiple keys can be mapped to the same escape string. as
514   // an example, see "kend" and "kc1" in st ("simple term" from suckless) :/.
515   if(eptr->ni.id){ // already had one here!
516     if(eptr->ni.id != special){
517       logwarn("already added escape (got 0x%x, wanted 0x%x)\n", eptr->ni.id, special);
518     }
519   }else{
520     eptr->ni.id = special;
521     eptr->ni.shift = shift;
522     eptr->ni.ctrl = ctrl;
523     eptr->ni.alt = alt;
524     eptr->ni.y = 0;
525     eptr->ni.x = 0;
526     logdebug("added 0x%08x to %u\n", special, esctrie_idx(a, eptr));
527   }
528   return 0;
529 }
530 
531 // returns -1 for non-match, 0 for match, 1 for acceptance. if we are in the
532 // middle of a sequence, and receive an escape, *do not call this*, but
533 // instead call reset_automaton() after replaying the used characters to the
534 // bulk input buffer, and *then* call this with the escape.
walk_automaton(automaton * a,struct inputctx * ictx,unsigned candidate,ncinput * ni)535 int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
536                    ncinput* ni){
537   if(candidate >= 0x80){
538     logerror("eight-bit char %u in control sequence\n", candidate);
539     return -1;
540   }
541   esctrie* e = esctrie_from_idx(a, a->state);
542   // we ought not have been called for an escape with any state!
543   if(candidate == 0x1b && !a->instring){
544     assert(NULL == e);
545     a->state = a->escapes;
546     return 0;
547   }
548   if(e->ntype == NODE_STRING){
549     if(candidate == 0x1b || candidate == 0x07){
550       a->state = e->trie[candidate];
551       a->instring = 0;
552     }
553     e = esctrie_from_idx(a, a->state);
554     if(e->ntype == NODE_FUNCTION){ // for the 0x07s of the world
555       if(e->fxn == NULL){
556         return 2;
557       }
558       return e->fxn(ictx);
559     }
560     return 0;
561   }
562   if((a->state = e->trie[candidate]) == 0){
563     if(esctrie_idx(a, e) == a->escapes){
564       memset(ni, 0, sizeof(*ni));
565       ni->id = candidate;
566       ni->alt = true;
567       return 1;
568     }
569     loginfo("unexpected transition on %u[%u]\n",
570             esctrie_idx(a, e), candidate);
571     return -1;
572   }
573   e = esctrie_from_idx(a, a->state);
574   // initialize any node we've just stepped into
575   switch(e->ntype){
576     case NODE_NUMERIC:
577       break;
578     case NODE_STRING:
579       a->instring = 1;
580       break;
581     case NODE_SPECIAL:
582       if(e->ni.id){
583         memcpy(ni, &e->ni, sizeof(*ni));
584         return 1;
585       }
586       break;
587     case NODE_FUNCTION:
588       if(e->fxn == NULL){
589         return 2;
590       }
591       return e->fxn(ictx);
592       break;
593   }
594   return 0;
595 }
596