1 #include "automaton.h"
2 #include "internal.h"
3
4 // the input automaton, walked for all escape sequences. an escape sequence is
5 // everything from an escape through recognized termination of that escape, or
6 // abort of the sequence via another escape, save the case of DCS sequences
7 // (those beginning with Escape-P), which are terminated by the ST sequence
8 // Escape-\. in the case of an aborted sequence, the sequence in its entirety
9 // is replayed as regular input. regular input is not driven through this
10 // automaton.
11 //
12 // one complication is that the user can just press escape themselves, followed
13 // by arbitrary other keypresses. when input is redirected from some source
14 // other than the connected terminal, this is no problem: we know control
15 // sequences to be coming in from the connected terminal, and everything else
16 // is bulk input.
17
18 // we assumed escapes can only be composed of 7-bit chars
19 typedef struct esctrie {
20 // if non-NULL, this is the next level of radix-128 trie. it is NULL on
21 // accepting nodes, since no valid control sequence is a prefix of another
22 // valid control sequence. links are 1-biased (0 is NULL).
23 unsigned* trie;
24 enum {
25 NODE_SPECIAL, // an accepting node, or pure transit (if ni.id == 0)
26 NODE_NUMERIC, // accumulates a number
27 NODE_STRING, // accumulates a string
28 NODE_FUNCTION, // invokes a function
29 } ntype;
30 ncinput ni; // composed key terminating here
31 triefunc fxn; // function to call on match
32 unsigned kleene; // idx of kleene match
33 } esctrie;
34
35 // get node corresponding to 1-biased index
36 static inline esctrie*
esctrie_from_idx(const automaton * a,unsigned idx)37 esctrie_from_idx(const automaton* a, unsigned idx){
38 if(idx == 0){
39 return NULL;
40 }
41 return a->nodepool + (idx - 1);
42 }
43
44 // return 1-biased index of node in pool
45 static inline unsigned
esctrie_idx(const automaton * a,const esctrie * e)46 esctrie_idx(const automaton* a, const esctrie* e){
47 return e - a->nodepool + 1;
48 }
49
esctrie_id(const esctrie * e)50 uint32_t esctrie_id(const esctrie* e){
51 return e->ni.id;
52 }
53
54 // returns the idx of the new node, or 0 on failure (idx is 1-biased).
55 // *invalidates any existing escnode pointers!*
56 static inline unsigned
create_esctrie_node(automaton * a,int special)57 create_esctrie_node(automaton* a, int special){
58 if(a->poolused == a->poolsize){
59 unsigned newsize = a->poolsize ? a->poolsize * 2 : 512;
60 esctrie* tmp = realloc(a->nodepool, sizeof(*a->nodepool) * newsize);
61 if(tmp == NULL){
62 return 0;
63 }
64 a->nodepool = tmp;
65 a->poolsize = newsize;
66 }
67 esctrie* e = &a->nodepool[a->poolused++];
68 memset(e, 0, sizeof(*e));
69 e->ntype = NODE_SPECIAL;
70 if((e->ni.id = special) == 0){
71 const size_t tsize = sizeof(*e->trie) * 0x80;
72 if((e->trie = malloc(tsize)) == NULL){
73 --a->poolused;
74 return 0;
75 }
76 memset(e->trie, 0, tsize);
77 }
78 return esctrie_idx(a, e);
79 }
80
input_free_esctrie(automaton * a)81 void input_free_esctrie(automaton* a){
82 a->escapes = 0;
83 a->poolsize = 0;
84 for(unsigned i = 0 ; i < a->poolused ; ++i){
85 free(a->nodepool[i].trie);
86 }
87 free(a->nodepool);
88 a->poolused = 0;
89 a->nodepool = NULL;
90 }
91
92 static int
esctrie_make_kleene(automaton * a,esctrie * e,unsigned follow,esctrie * term)93 esctrie_make_kleene(automaton* a, esctrie* e, unsigned follow, esctrie* term){
94 if(e->ntype != NODE_SPECIAL){
95 logerror("can't make node type %d string\n", e->ntype);
96 return -1;
97 }
98 for(unsigned i = 0 ; i < 0x80 ; ++i){
99 if(i == follow){
100 e->trie[i] = esctrie_idx(a, term);
101 }else if(e->trie[i] == 0){
102 e->trie[i] = esctrie_idx(a, e);
103 }
104 }
105 return 0;
106 }
107
108 static int
esctrie_make_function(esctrie * e,triefunc fxn)109 esctrie_make_function(esctrie* e, triefunc fxn){
110 if(e->ntype != NODE_SPECIAL){
111 logerror("can't make node type %d function\n", e->ntype);
112 return -1;
113 }
114 if(e->trie){
115 logerror("can't make followed function\n");
116 return -1;
117 }
118 e->ntype = NODE_FUNCTION;
119 e->fxn = fxn;
120 return 0;
121 }
122
123 static esctrie*
esctrie_make_string(automaton * a,esctrie * e,unsigned rxvtstyle)124 esctrie_make_string(automaton* a, esctrie* e, unsigned rxvtstyle){
125 if(e->ntype == NODE_STRING){
126 logerror("repeated string node\n");
127 return NULL;
128 }
129 if(e->ntype != NODE_SPECIAL){
130 logerror("can't make node type %d string\n", e->ntype);
131 return NULL;
132 }
133 for(int i = 0 ; i < 0x80 ; ++i){
134 if(!isprint(i)){
135 continue;
136 }
137 if(e->trie[i]){
138 logerror("can't make %c-followed string\n", i);
139 return NULL;
140 }
141 }
142 esctrie* newe = esctrie_from_idx(a, create_esctrie_node(a, 0));
143 if(newe == NULL){
144 return NULL;
145 }
146 for(int i = 0 ; i < 0x80 ; ++i){
147 if(!isprint(i)){
148 continue;
149 }
150 e->trie[i] = esctrie_idx(a, newe);
151 }
152 e = newe;
153 e->ntype = NODE_STRING;
154 for(int i = 0 ; i < 0x80 ; ++i){
155 if(!isprint(i)){
156 continue;
157 }
158 e->trie[i] = esctrie_idx(a, newe);
159 }
160 if(rxvtstyle){ // ends with bare ESC, not BEL/ST
161 if((e->trie[0x1b] = create_esctrie_node(a, 0)) == 0){
162 return NULL;
163 }
164 e = esctrie_from_idx(a, e->trie[0x1b]);
165 e->ni.id = 0;
166 e->ntype = NODE_SPECIAL;
167 }else{
168 if((e->trie[0x07] = create_esctrie_node(a, NCKEY_INVALID)) == 0){
169 return NULL;
170 }
171 esctrie* term = esctrie_from_idx(a, e->trie[0x07]);
172 if((e->trie[0x1b] = create_esctrie_node(a, 0)) == 0){
173 return NULL;
174 }
175 e = esctrie_from_idx(a, e->trie[0x1b]);
176 e->trie['\\'] = esctrie_idx(a, term);
177 term->ni.id = 0;
178 term->ntype = NODE_SPECIAL;
179 e = term;
180 }
181 logdebug("made string: %u\n", esctrie_idx(a, e));
182 return e;
183 }
184
185 static esctrie*
link_kleene(automaton * a,esctrie * e,unsigned follow)186 link_kleene(automaton* a, esctrie* e, unsigned follow){
187 unsigned eidx = esctrie_idx(a, e);
188 if(e->kleene){
189 return a->nodepool + e->kleene;
190 }
191 // invalidates e
192 unsigned termidx = create_esctrie_node(a, 0);
193 unsigned targidx = create_esctrie_node(a, 0);
194 esctrie* term = esctrie_from_idx(a, termidx);
195 esctrie* targ = esctrie_from_idx(a, targidx);
196 if(targ == NULL){
197 return NULL;
198 }
199 if(term == NULL){
200 return NULL;
201 }
202 if(esctrie_make_kleene(a, targ, follow, term)){
203 return NULL;
204 }
205 e = esctrie_from_idx(a, eidx);
206 // fill in all NULL numeric links with the new target
207 for(unsigned int i = 0 ; i < 0x80 ; ++i){
208 if(i == follow){
209 if(e->trie[i]){
210 logerror("drain terminator already registered\n");
211 return NULL;
212 }
213 e->trie[follow] = esctrie_idx(a, term);
214 }else if(e->trie[i] == 0){
215 e->trie[i] = esctrie_idx(a, targ);
216 }
217 }
218 targ->kleene = esctrie_idx(a, targ);
219 return esctrie_from_idx(a, e->trie[follow]);
220 }
221
222 // phase 1 of the numeric algorithm; find a φ node on e. not sure what
223 // to do if we have non-φ links at every digit...punt for now FIXME.
224 static inline unsigned
get_phi_node(automaton * a,esctrie * e)225 get_phi_node(automaton* a, esctrie* e){
226 // find a linked NODE_NUMERIC, if one exists. we'll want to reuse it.
227 int nonphis = 0;
228 esctrie* targ;
229 for(int i = '0' ; i <= '9' ; ++i){
230 if( (targ = esctrie_from_idx(a, e->trie[i])) ){
231 if(targ->ntype == NODE_NUMERIC){
232 logtrace("found existing phi node %u[%c]->%u\n", esctrie_idx(a, e), i, esctrie_idx(a, targ));
233 break;
234 }else{
235 ++nonphis;
236 targ = NULL;
237 }
238 }
239 }
240 // we either have a numeric target, or will make one now. if we create a new
241 // one, be sure to mark it numeric, and add all digit links back to itself.
242 if(targ == NULL){
243 if(nonphis == 10){
244 logerror("ten non-phi links from %u\n", esctrie_idx(a, e));
245 return 0;
246 }
247 if((targ = esctrie_from_idx(a, create_esctrie_node(a, 0))) == 0){
248 return 0;
249 }
250 targ->ntype = NODE_NUMERIC;
251 for(int i = '0' ; i <= '9' ; ++i){
252 targ->trie[i] = esctrie_idx(a, targ);
253 }
254 }
255 assert(NODE_NUMERIC == targ->ntype);
256 return esctrie_idx(a, targ);
257 }
258
259 // phase 2 of the numeric algorithm; find a ή node for |successor| on |phi|.
260 static inline unsigned
get_eta_node(automaton * a,esctrie * phi,unsigned successor)261 get_eta_node(automaton* a, esctrie* phi, unsigned successor){
262 unsigned phiidx = esctrie_idx(a, phi);
263 unsigned etaidx = phi->trie[successor];
264 esctrie* eta = esctrie_from_idx(a, etaidx);
265 if(eta == NULL){
266 // invalidates phi
267 if((eta = esctrie_from_idx(a, create_esctrie_node(a, 0))) == NULL){
268 return 0;
269 }
270 phi = esctrie_from_idx(a, phiidx);
271 phi->trie[successor] = esctrie_idx(a, eta);
272 }
273 return esctrie_idx(a, eta);
274 }
275
276 // |e| is a known-standard node reached by our prefix; go ahead and prep both
277 // phi and eta links from it.
278 static inline void
add_phi_and_eta_chain(const automaton * a,esctrie * e,unsigned phi,unsigned follow,unsigned eta)279 add_phi_and_eta_chain(const automaton *a, esctrie* e, unsigned phi,
280 unsigned follow, unsigned eta){
281 //logtrace("working with %u phi: %u follow: %u eta: %u\n", esctrie_idx(a, e), phi, follow, eta);
282 for(int i = '0' ; i <= '9' ; ++i){
283 esctrie* chain = esctrie_from_idx(a, e->trie[i]);
284 if(chain == NULL){
285 //logdebug("linking %u[%d] to %u\n", esctrie_idx(a, e), i, phi);
286 e->trie[i] = phi;
287 }else if(chain->ntype == NODE_SPECIAL){
288 //logdebug("propagating along %u[%c]\n", e->trie[i], i);
289 add_phi_and_eta_chain(a, esctrie_from_idx(a, e->trie[i]), phi, follow, eta);
290 }
291 }
292 if(e->trie[follow] == 0){
293 //logdebug("linking %u[%u] to %u\n", esctrie_idx(a, e), follow, eta);
294 e->trie[follow] = eta;
295 }
296 }
297
298 // phase 3 of the numeric algorithm: walk the automaton, finding all nodes
299 // which are prefixes of phi (all nodes matching the prefix, and all numeric
300 // non-phi chains from those nodes) and linking them to phi, and finding all
301 // nodes which are prefixes of eta (all numeric non-phi chains from the
302 // previous set) and linking them to eta. |e| is the path thus far.
303 static inline void
add_phi_and_eta_recurse(automaton * a,esctrie * e,const char * prefix,int pfxlen,esctrie * phi,unsigned follow,esctrie * eta,unsigned inphi)304 add_phi_and_eta_recurse(automaton* a, esctrie* e, const char* prefix,
305 int pfxlen, esctrie* phi, unsigned follow,
306 esctrie* eta, unsigned inphi){
307 //logtrace("working with %u %d prefix [%*.*s]\n", esctrie_idx(a, e), pfxlen, pfxlen, pfxlen, prefix);
308 // if pfxlen == 0, we found a match for our fixed prefix. start adding phi
309 // links whereever we can. where we find chained numerics, add an eta link.
310 if(pfxlen == 0){
311 add_phi_and_eta_chain(a, e, esctrie_idx(a, phi), follow, esctrie_idx(a, eta));
312 return;
313 }
314 // when we hit a \N in the prefix, we must recurse along all digit links
315 if(*prefix == '\\'){
316 ++prefix;
317 --pfxlen;
318 if(*prefix != 'N'){
319 logerror("illegal wildcard in prefix %c\n", *prefix);
320 return;
321 }
322 ++prefix;
323 --pfxlen;
324 for(int i = '0' ; i <= '9' ; ++i){
325 if(e->trie[i] == 0){
326 //logdebug("linking %u[%d] to %u\n", esctrie_idx(a, e), i, esctrie_idx(a, phi));
327 e->trie[i] = esctrie_idx(a, phi);
328 }else{
329 add_phi_and_eta_recurse(a, esctrie_from_idx(a, e->trie[i]),
330 prefix, pfxlen, phi, follow, eta, 1);
331 }
332 }
333 }else{
334 if(inphi){
335 for(int i = '0' ; i <= '9' ; ++i){
336 if(e->trie[i] == 0){
337 //logdebug("linking %u[%d] to %u\n", esctrie_idx(a, e), i, esctrie_idx(a, phi));
338 e->trie[i] = esctrie_idx(a, phi);
339 }else if(e->trie[i] != esctrie_idx(a, e)){
340 add_phi_and_eta_recurse(a, esctrie_from_idx(a, e->trie[i]),
341 prefix, pfxlen, phi, follow, eta, 1);
342 }
343 }
344 }
345 unsigned char p = *prefix;
346 if(e->trie[p]){
347 add_phi_and_eta_recurse(a, esctrie_from_idx(a, e->trie[p]),
348 prefix + 1, pfxlen - 1, phi, follow, eta, 0);
349 }
350 }
351 }
352
353 // |prefix| does *not* lead with an escape, and does not include the numeric.
354 static inline void
add_phi_and_eta(automaton * a,const char * prefix,size_t pfxlen,esctrie * phi,unsigned follow,esctrie * eta)355 add_phi_and_eta(automaton* a, const char* prefix, size_t pfxlen,
356 esctrie* phi, unsigned follow, esctrie* eta){
357 esctrie* esc = esctrie_from_idx(a, a->escapes);
358 if(esc == NULL){
359 return;
360 }
361 add_phi_and_eta_recurse(a, esc, prefix, pfxlen, phi, follow, eta, 0);
362 }
363
364 // accept any digit and transition to a numeric node. |e| is the culmination of
365 // the prefix before the numeric. |follow| is the successor of the numeric.
366 // here's our approach:
367 // - find a link to a numeric from e. there can only be one node (thought it
368 // might have many links), so we can use the first one we find.
369 // - if there is no such numeric node linked from e, create one.
370 // (FIXME if all ten digits are occupied, what would we do?)
371 // - chosen numeric node is φ.
372 // - if an appropriate follow node exists linked from φ, choose it as ή.
373 // - otherwise, create a new ή and link it from φ.
374 // - walk from the top, finding all possible prefixes of φ.
375 // - at each, link all unused digits to φ.
376 // - from each that is also a possible prefix of ή, link ή.
377 static esctrie*
link_numeric(automaton * a,const char * prefix,int pfxlen,esctrie * e,unsigned char follow)378 link_numeric(automaton* a, const char* prefix, int pfxlen,
379 esctrie* e, unsigned char follow){
380 logdebug("adding numeric with follow %c following %*.*s\n", follow, pfxlen, pfxlen, prefix);
381 unsigned phiidx = get_phi_node(a, e);
382 if(phiidx == 0){
383 return NULL;
384 }
385 esctrie* phi = esctrie_from_idx(a, phiidx);
386 // invalidates phi
387 unsigned etaidx = get_eta_node(a, phi, follow);
388 if(etaidx == 0){
389 return NULL;
390 }
391 phi = esctrie_from_idx(a, phiidx);
392 esctrie* eta = esctrie_from_idx(a, etaidx);
393 logtrace("phi node: %u->%u\n", esctrie_idx(a, e), esctrie_idx(a, phi));
394 logtrace("eta node: %u philink[%c]: %u\n", esctrie_idx(a, eta), follow, phi->trie[follow]);
395 // eta is now bound to phi, and phi links something at all digits, but no
396 // other links are guaranteed. walk the automaton, finding all possible
397 // prefixes of φ (and linking to φ) and all possible prefixes of ή (and
398 // linking them to ή).
399 add_phi_and_eta(a, prefix, pfxlen, phi, follow, eta);
400 return eta;
401 }
402
403 static esctrie*
insert_path(automaton * a,const char * seq)404 insert_path(automaton* a, const char* seq){
405 if(a->escapes == 0){
406 if((a->escapes = create_esctrie_node(a, 0)) == 0){
407 return NULL;
408 }
409 }
410 esctrie* eptr = esctrie_from_idx(a, a->escapes);
411 bool inescape = false;
412 const char* seqstart = seq;
413 unsigned char c;
414 while( (c = *seq++) ){
415 if(c == '\\'){
416 if(inescape){
417 logerror("illegal escape: \\\n");
418 return NULL;
419 }
420 inescape = true;
421 }else if(inescape){
422 if(c == 'N'){
423 // a numeric must be followed by some terminator
424 if(!*seq){
425 logerror("illegal numeric terminator\n");
426 return NULL;
427 }
428 c = *seq++;
429 eptr = link_numeric(a, seqstart, seq - 3 - seqstart, eptr, c);
430 if(eptr == NULL){
431 return NULL;
432 }
433 }else if(c == 'S' || c == 'R'){
434 // strings always end with ST ("\e\\") or at least ("\e")
435 if((eptr = esctrie_make_string(a, eptr, c == 'R')) == NULL){
436 return NULL;
437 }
438 return eptr;
439 }else if(c == 'D'){ // drain (kleene closure)
440 // a kleene must be followed by some terminator
441 if(!*seq){
442 logerror("illegal kleene terminator\n");
443 return NULL;
444 }
445 c = *seq++;
446 eptr = link_kleene(a, eptr, c);
447 if(eptr == NULL){
448 return NULL;
449 }
450 }else{
451 logerror("illegal escape: %u\n", c);
452 return NULL;
453 }
454 inescape = false;
455 }else{ // fixed character
456 unsigned eidx = esctrie_idx(a, eptr);
457 // invalidates eptr
458 if(eptr->trie[c] == 0 || eptr->trie[c] == eptr->kleene){
459 unsigned tidx = create_esctrie_node(a, 0);
460 if(tidx == 0){
461 return NULL;
462 }
463 eptr = esctrie_from_idx(a, eidx);
464 eptr->trie[c] = tidx;
465 }else if(esctrie_from_idx(a, eptr->trie[c])->ntype == NODE_NUMERIC){
466 // punch a hole through the numeric loop. create a new one, and fill
467 // it in with the existing target.
468 struct esctrie* newe;
469 // invalidates eptr
470 if((newe = esctrie_from_idx(a, create_esctrie_node(a, 0))) == 0){
471 return NULL;
472 }
473 eptr = esctrie_from_idx(a, eidx);
474 for(int i = 0 ; i < 0x80 ; ++i){
475 newe->trie[i] = esctrie_from_idx(a, eptr->trie[c])->trie[i];
476 }
477 eptr->trie[c] = esctrie_idx(a, newe);
478 }
479 eptr = esctrie_from_idx(a, eidx);
480 eptr = esctrie_from_idx(a, eptr->trie[c]);
481 logtrace("added fixed %c %u as %u\n", c, c, esctrie_idx(a, eptr));
482 }
483 }
484 if(inescape){
485 logerror("illegal escape at end of line\n");
486 return NULL;
487 }
488 return eptr;
489 }
490
491 // add a cflow path to the automaton
inputctx_add_cflow(automaton * a,const char * seq,triefunc fxn)492 int inputctx_add_cflow(automaton* a, const char* seq, triefunc fxn){
493 esctrie* eptr = insert_path(a, seq);
494 if(eptr == NULL){
495 return -1;
496 }
497 free(eptr->trie);
498 eptr->trie = NULL;
499 return esctrie_make_function(eptr, fxn);
500 }
501
502 // multiple input escapes might map to the same input
inputctx_add_input_escape(automaton * a,const char * esc,uint32_t special,unsigned shift,unsigned ctrl,unsigned alt)503 int inputctx_add_input_escape(automaton* a, const char* esc, uint32_t special,
504 unsigned shift, unsigned ctrl, unsigned alt){
505 if(esc[0] != NCKEY_ESC || strlen(esc) < 2){ // assume ESC prefix + content
506 logerror("not an escape (0x%x)\n", special);
507 return -1;
508 }
509 esctrie* eptr = insert_path(a, esc + 1);
510 if(eptr == NULL){
511 return -1;
512 }
513 // it appears that multiple keys can be mapped to the same escape string. as
514 // an example, see "kend" and "kc1" in st ("simple term" from suckless) :/.
515 if(eptr->ni.id){ // already had one here!
516 if(eptr->ni.id != special){
517 logwarn("already added escape (got 0x%x, wanted 0x%x)\n", eptr->ni.id, special);
518 }
519 }else{
520 eptr->ni.id = special;
521 eptr->ni.shift = shift;
522 eptr->ni.ctrl = ctrl;
523 eptr->ni.alt = alt;
524 eptr->ni.y = 0;
525 eptr->ni.x = 0;
526 logdebug("added 0x%08x to %u\n", special, esctrie_idx(a, eptr));
527 }
528 return 0;
529 }
530
531 // returns -1 for non-match, 0 for match, 1 for acceptance. if we are in the
532 // middle of a sequence, and receive an escape, *do not call this*, but
533 // instead call reset_automaton() after replaying the used characters to the
534 // bulk input buffer, and *then* call this with the escape.
walk_automaton(automaton * a,struct inputctx * ictx,unsigned candidate,ncinput * ni)535 int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
536 ncinput* ni){
537 if(candidate >= 0x80){
538 logerror("eight-bit char %u in control sequence\n", candidate);
539 return -1;
540 }
541 esctrie* e = esctrie_from_idx(a, a->state);
542 // we ought not have been called for an escape with any state!
543 if(candidate == 0x1b && !a->instring){
544 assert(NULL == e);
545 a->state = a->escapes;
546 return 0;
547 }
548 if(e->ntype == NODE_STRING){
549 if(candidate == 0x1b || candidate == 0x07){
550 a->state = e->trie[candidate];
551 a->instring = 0;
552 }
553 e = esctrie_from_idx(a, a->state);
554 if(e->ntype == NODE_FUNCTION){ // for the 0x07s of the world
555 if(e->fxn == NULL){
556 return 2;
557 }
558 return e->fxn(ictx);
559 }
560 return 0;
561 }
562 if((a->state = e->trie[candidate]) == 0){
563 if(esctrie_idx(a, e) == a->escapes){
564 memset(ni, 0, sizeof(*ni));
565 ni->id = candidate;
566 ni->alt = true;
567 return 1;
568 }
569 loginfo("unexpected transition on %u[%u]\n",
570 esctrie_idx(a, e), candidate);
571 return -1;
572 }
573 e = esctrie_from_idx(a, a->state);
574 // initialize any node we've just stepped into
575 switch(e->ntype){
576 case NODE_NUMERIC:
577 break;
578 case NODE_STRING:
579 a->instring = 1;
580 break;
581 case NODE_SPECIAL:
582 if(e->ni.id){
583 memcpy(ni, &e->ni, sizeof(*ni));
584 return 1;
585 }
586 break;
587 case NODE_FUNCTION:
588 if(e->fxn == NULL){
589 return 2;
590 }
591 return e->fxn(ictx);
592 break;
593 }
594 return 0;
595 }
596