1 /*
2  * Copyright © 2015 David Herrmann <dh.herrmann@gmail.com>
3  * Copyright © 2017, 2018 Christian Persch
4  *
5  * This library is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this library.  If not, see <https://www.gnu.org/licenses/>.
17  */
18 
19 #include "config.h"
20 
21 #include "parser.hh"
22 
23 #include <cstdio>
24 #include <cstring>
25 #include <cstdlib>
26 #include <cerrno>
27 
28 #include <glib.h>
29 
30 #include "parser-charset-tables.hh"
31 
32 #ifdef PARSER_INCLUDE_NOP
33 #define _VTE_NOQ(...) _VTE_SEQ(__VA_ARGS__)
34 #else
35 #define _VTE_NOQ(...)
36 #endif
37 
38 /*
39  * Terminal Parser
40  * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
41  * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
42  * control sequences and generic escape sequences.
43  * The parser itself does not perform any actions but lets the caller react to
44  * detected sequences.
45  *
46  * This parser is mostly DEC VT100+ compatible; known differences are:
47  *
48  * * DEC only recognises up to 16 parameters; vte up to 32 (and that can be easily
49  *   extended)
50  *
51  * * DEC's parameter values range is 0..16384; vte supports 0..65535 (16-bit range).
52  *
53  * * When the number of parameter exceeds that number, DEC executes the function
54  *   with these parameters, ignoring the excessive parameters; vte ignores the
55  *   whole function instead.
56  *
57  * * DEC ignores CSI sequences with colon-separated parameters; vte implements colon-
58  *   separated parameters as subparameters (this is an extension taken from ITU-T T.416).
59  *
60  * * DEC executes format effector controls in CSI, OSC, DCS sequences as if the
61  *   control was received before the control sequence; vte only does this for CSI
62  *   sequences and ignores all controls except ESC and BEL in OSC control strings,
63  *   and passes all controls except ESC through to the control string in DCS sequences.
64  *
65  * * DEC only allows ST (either C0 or C1) to terminate OSC strings; vte allows
66  *   OSC to be terminated by BEL (this is a deprecated xterm extension).
67  *
68  * * DEC parses ESC Z as DECID, a deprecated function equivalent to DA1; vte
69  *   implements ECMA-48's SCI (single character introducer) instead.
70  */
71 
72 /*
73  * Command Parser
74  * The ctl-seq parser "vte_parser" only detects whole sequences, it does not
75  * detect the specific command. Once a sequence is parsed, the command-parsers
76  * are used to figure out their meaning.
77  */
78 
79 /*
80  * Intermediates (and, for CSI/DCS, the optional parameter character) are
81  * stored efficiently in an unsigned int. Intermediates can be 2/00..2/15,
82  * plus one value for 'no intermediate'; together that fits into 5 bits.
83  * Parameter character can be 'no parameter character', or one from
84  * 3/12..3/15; that fits into 3 bits.
85  *
86  * In @seq.intermediates, the nth intermediates is stored with shift n * 5,
87  * plus (for CSI/DCS) an additional shift of 3 for the parameter character
88  * which is stored at bits 0..2.
89  *
90  * VTE_SEQ_PARAMETER(u) extracts the parameter character
91  *   of a CSI or DCS sequence
92  * VTE_SEQ_REMOVE_PARAMETER(u) extracts the intermediates
93  *   of a CSI or DCS sequence
94  * VTE_SEQ_INTERMEDIATE(u) extracts the first intermediate from an
95  *   intermediates value (for CSI/DCS, that must be without parameter
96  *   character, see VTE_SEQ_REMOVE_PARAMETER)
97  * VTE_SEQ_REMOVE_INTERMEDIATE(u) extracts the remaining intermediates
98  *   after the first one; use VTE_SEQ_INTERMEDIATE on its return value
99  *   to extract the 2nd intermediate, and so on
100  */
101 
102 #define VTE_SEQ_PARAMETER_BITS         (3)
103 #define VTE_SEQ_INTERMEDIATE_BITS      (5)
104 #define VTE_SEQ_INTERMEDIATE_MASK      ((1U << VTE_SEQ_INTERMEDIATE_BITS) - 1U)
105 #define VTE_SEQ_PARAMETER_MASK         ((1U << VTE_SEQ_PARAMETER_BITS) - 1U)
106 #define VTE_SEQ_PARAMETER(u)           ((u) & VTE_SEQ_PARAMETER_MASK)
107 #define VTE_SEQ_REMOVE_PARAMETER(u)    ((u) >> VTE_SEQ_PARAMETER_BITS)
108 #define VTE_SEQ_INTERMEDIATE(u)        ((u) & VTE_SEQ_INTERMEDIATE_MASK)
109 #define VTE_SEQ_REMOVE_INTERMEDIATE(u) ((u) >> VTE_SEQ_INTERMEDIATE_BITS)
110 #define VTE_MAKE_CHARSET(c,s)          ((c) | ((s) << VTE_CHARSET_SLOT_OFFSET))
111 
112 /*
113  * _VTE_SEQ_CODE_ESC(final, intermediates):
114  *
115  * Make a value combining the final character and the intermediates,
116  * to be used to match a sequence against known sequences.
117  *
118  * Since this is only used with NONE or HASH as first intermediate,
119  * we can reduce the size of the lookup table by slashing the least
120  * significant bit off.
121  *
122  * Final characters is 3/0..7/14, needing 7 bits.
123  */
124 #define _VTE_SEQ_CODE_ESC(f,i) (((f) - 0x30) | ((i) >> 1) << 7)
125 
126 /*
127  * _VTE_SEQ_CODE_COMBINE(parameter, intermediates)
128  *
129  * Combines intermediates and the parameter character into one
130  * value to be used when matching a sequence against known sequences.
131  */
132 #define _VTE_SEQ_CODE_COMBINE(p,i) ((p) | ((i) << VTE_SEQ_PARAMETER_BITS))
133 
134 /*
135  * _VTE_SEQ_CODE(final, intermediates):
136  *
137  * Make a value combining the final character and the intermediates,
138  * to be used to match a sequence against known sequences. Used for
139  * CSI and DCS sequences; use _VTE_SEQ_CODE_COMBINE to combine
140  * parameter and intermediates into one to pass as 2nd argument here.
141  *
142  * Final character is 4/0..7/14, needing 6 bits.
143  */
144 #define _VTE_SEQ_CODE(f,i) (((f) - 0x40) | ((i) << 6))
145 
146 /*
147  * @introducer: either a C1 control, or the final in the equivalent ESC F sequence
148  * @terminator: either a C1 control, or the final in the equivalent ESC F sequence
149  *
150  * Checks whether the OSC/DCS @introducer and the ST @terminator
151  * are from the same control set, i.e. both C0 or both C1.
152  *
153  * For OSC, this check allows C0 OSC with BEL-as-ST to pass, too.
154  */
155 static inline bool
parser_check_matching_controls(uint32_t introducer,uint32_t terminator)156 parser_check_matching_controls(uint32_t introducer,
157                                uint32_t terminator)
158 {
159         return ((introducer ^ terminator) & 0x80) == 0;
160 }
161 
162 static unsigned int
vte_parse_host_control(vte_seq_t const * seq)163 vte_parse_host_control(vte_seq_t const* seq)
164 {
165         switch (seq->terminator) {
166 #define _VTE_SEQ(cmd,type,f,pi,ni,i0,flags) case f: return VTE_CMD_##cmd;
167 #include "parser-c01.hh"
168 #undef _VTE_SEQ
169         default: return VTE_CMD_NONE;
170         }
171 }
172 
173 /* ECMA-35 § 14.1 specifies that the final character 7/14 always identifies
174  * an empty set. Note that that this does not apply for DRCS sets (§ 14.4),
175  * since § 13.3.3 says that all the Ft (4/0..7/14) bytes are private-use.
176  */
177 static inline constexpr unsigned int
charset_empty_or_none(uint32_t raw)178 charset_empty_or_none(uint32_t raw)
179 {
180         return raw == 0x7e ? VTE_CHARSET_EMPTY : VTE_CHARSET_NONE;
181 }
182 
183 static unsigned int
vte_parse_charset_94(uint32_t raw,unsigned int intermediates)184 vte_parse_charset_94(uint32_t raw,
185                      unsigned int intermediates)
186 {
187         assert (raw >= 0x30 && raw < 0x7f);
188 
189         unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
190 
191         switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
192         case VTE_SEQ_INTERMEDIATE_NONE:
193                 if (remaining_intermediates == 0 &&
194                     raw < (0x30 + G_N_ELEMENTS(charset_graphic_94)))
195                         return charset_graphic_94[raw - 0x30];
196                 break;
197 
198         case VTE_SEQ_INTERMEDIATE_SPACE:
199                 return VTE_CHARSET_DRCS;
200 
201         case VTE_SEQ_INTERMEDIATE_BANG:
202                 if (remaining_intermediates == 0 &&
203                     raw >= 0x40 && (raw < 0x40 + G_N_ELEMENTS(charset_graphic_94_with_2_1)))
204                         return charset_graphic_94_with_2_1[raw - 0x40];
205                 break;
206 
207         case VTE_SEQ_INTERMEDIATE_DQUOTE:
208                 if (remaining_intermediates == 0 &&
209                     raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_with_2_2)))
210                         return charset_graphic_94_with_2_2[raw - 0x30];
211                 break;
212 
213         case VTE_SEQ_INTERMEDIATE_HASH:
214         case VTE_SEQ_INTERMEDIATE_CASH:
215                 break;
216 
217         case VTE_SEQ_INTERMEDIATE_PERCENT:
218                 if (remaining_intermediates == 0 &&
219                     raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_with_2_5)))
220                         return charset_graphic_94_with_2_5[raw - 0x30];
221                 break;
222 
223         case VTE_SEQ_INTERMEDIATE_AND:
224                 if (remaining_intermediates == 0 &&
225                     raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_with_2_6)))
226                         return charset_graphic_94_with_2_6[raw - 0x30];
227                 break;
228         }
229 
230         return charset_empty_or_none(raw);
231 }
232 
233 static unsigned int
vte_parse_charset_94_n(uint32_t raw,unsigned int intermediates)234 vte_parse_charset_94_n(uint32_t raw,
235                        unsigned int intermediates)
236 {
237         assert (raw >= 0x30 && raw < 0x7f);
238 
239         unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
240 
241         switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
242         case VTE_SEQ_INTERMEDIATE_NONE:
243                 if (remaining_intermediates == 0 &&
244                     raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_n)))
245                         return charset_graphic_94_n[raw - 0x30];
246                 break;
247 
248         case VTE_SEQ_INTERMEDIATE_SPACE:
249                 return VTE_CHARSET_DRCS;
250 
251         case VTE_SEQ_INTERMEDIATE_BANG:
252                 if (remaining_intermediates == 0 &&
253                     raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_n_with_2_1)))
254                         return charset_graphic_94_n_with_2_1[raw - 0x30];
255                 break;
256         }
257 
258         return charset_empty_or_none(raw);
259 }
260 
261 static unsigned int
vte_parse_charset_96(uint32_t raw,unsigned int intermediates)262 vte_parse_charset_96(uint32_t raw,
263                      unsigned int intermediates)
264 {
265         assert (raw >= 0x30 && raw < 0x7f);
266 
267         unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
268 
269         switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
270         case VTE_SEQ_INTERMEDIATE_NONE:
271                 if (remaining_intermediates == 0 &&
272                     raw < (0x30 + G_N_ELEMENTS(charset_graphic_96)))
273                         return charset_graphic_96[raw - 0x30];
274                 break;
275 
276         case VTE_SEQ_INTERMEDIATE_SPACE:
277                 return VTE_CHARSET_DRCS;
278         }
279 
280         return charset_empty_or_none(raw);
281 }
282 
283 static unsigned int
vte_parse_charset_96_n(uint32_t raw,unsigned int intermediates)284 vte_parse_charset_96_n(uint32_t raw,
285                        unsigned int intermediates)
286 {
287         if (VTE_SEQ_INTERMEDIATE(intermediates) == VTE_SEQ_INTERMEDIATE_SPACE)
288                 return VTE_CHARSET_DRCS;
289 
290         return charset_empty_or_none(raw);
291 }
292 
293 static unsigned int
vte_parse_charset_ocs(uint32_t raw,unsigned int intermediates)294 vte_parse_charset_ocs(uint32_t raw,
295                       unsigned int intermediates)
296 {
297         assert (raw >= 0x30 && raw < 0x7f);
298 
299         unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
300 
301         switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
302         case VTE_SEQ_INTERMEDIATE_NONE:  /* OCS with standard return */
303                 if (remaining_intermediates == 0 &&
304                     raw >= 0x30 && raw < (0x30 + G_N_ELEMENTS(charset_ocs)))
305                         return charset_ocs[raw - 0x30];
306                 break;
307 
308         case VTE_SEQ_INTERMEDIATE_SPACE: /* OCS with standard return */
309                 if (remaining_intermediates == 0 &&
310                     raw >= 0x30 && raw < (0x30 + G_N_ELEMENTS(charset_ocs_with_2_0)))
311                         return charset_ocs_with_2_0[raw - 0x30];
312                 break;
313 
314         case VTE_SEQ_INTERMEDIATE_BANG ... VTE_SEQ_INTERMEDIATE_DOT: /* OCS with standard return */
315                 break;
316 
317         case VTE_SEQ_INTERMEDIATE_SLASH: /* OCS without standard return */
318                 if (remaining_intermediates == 0 &&
319                     raw >= 0x40 && raw < (0x40 + G_N_ELEMENTS(charset_ocs_with_2_15)))
320                         return charset_ocs_with_2_15[raw - 0x40];
321                 break;
322         }
323 
324         return VTE_CHARSET_NONE;
325 }
326 
327 static unsigned int
vte_parse_charset_control(uint32_t raw,unsigned int intermediates)328 vte_parse_charset_control(uint32_t raw,
329                           unsigned int intermediates)
330 {
331         assert (raw >= 0x30 && raw < 0x7f);
332 
333         unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
334 
335         switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
336         case VTE_SEQ_INTERMEDIATE_BANG: /* C0 controls */
337                 if (remaining_intermediates == 0 &&
338                     raw >= 0x40 && raw < (0x40 + G_N_ELEMENTS(charset_control_c0)))
339                         return charset_control_c0[raw - 0x40];
340                 break;
341 
342         case VTE_SEQ_INTERMEDIATE_DQUOTE: /* C1 controls */
343                 if (remaining_intermediates == 0 &&
344                     raw >= 0x40 && raw < (0x40 + G_N_ELEMENTS(charset_control_c1)))
345                         return charset_control_c1[raw - 0x40];
346                 break;
347         }
348 
349         return charset_empty_or_none(raw);
350 }
351 
352 static unsigned int
vte_parse_host_escape(vte_seq_t const * seq,unsigned int * cs_out)353 vte_parse_host_escape(vte_seq_t const* seq,
354                       unsigned int *cs_out)
355 {
356         unsigned int intermediates = seq->intermediates;
357         unsigned int intermediate0 = VTE_SEQ_INTERMEDIATE(intermediates);
358 
359         /* Switch on the first intermediate */
360         switch (intermediate0) {
361         case VTE_SEQ_INTERMEDIATE_NONE:
362         case VTE_SEQ_INTERMEDIATE_HASH: {  /* Single control functions */
363                 switch (_VTE_SEQ_CODE_ESC(seq->terminator, intermediates)) {
364 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
365                         case _VTE_SEQ_CODE_ESC(f, VTE_SEQ_INTERMEDIATE_##i): return VTE_CMD_##cmd;
366 #include "parser-esc.hh"
367 #undef _VTE_SEQ
368                 default: return VTE_CMD_NONE;
369                 }
370                 break;
371         }
372 
373         case VTE_SEQ_INTERMEDIATE_SPACE:   /* Announce code structure */
374                 if (VTE_SEQ_REMOVE_INTERMEDIATE(intermediates) == 0)
375                         return VTE_CMD_ACS;
376                 break;
377 
378         case VTE_SEQ_INTERMEDIATE_BANG:    /* C0-designate */
379         case VTE_SEQ_INTERMEDIATE_DQUOTE:  /* C1-designate */
380                 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_control(seq->terminator, intermediates),
381                                            intermediate0 - VTE_SEQ_INTERMEDIATE_BANG);
382                 return VTE_CMD_CnD;
383 
384         case VTE_SEQ_INTERMEDIATE_CASH: {  /* Designate multi-byte character sets */
385                 unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
386                 unsigned int intermediate1 = VTE_SEQ_INTERMEDIATE(remaining_intermediates);
387                 remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(remaining_intermediates);
388 
389                 /* Check the 2nd intermediate */
390                 switch (intermediate1) {
391                 case VTE_SEQ_INTERMEDIATE_NONE:
392                         /* For compatibility with an earlier version of ISO-2022,
393                          * ESC 2/4 4/0, ESC 2/4 4/1 and ESC 2/4 4/2 designate G0
394                          * sets (i.e., without the 2/8 as 2nd intermediate byte).
395                          */
396                         switch (seq->terminator) {
397                         case '@':
398                         case 'A':
399                         case 'B': /* G0-designate multibyte charset */
400                                 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_94_n(seq->terminator,
401                                                                                   remaining_intermediates),
402                                                            0);
403                                 return VTE_CMD_GnDMm;
404                         }
405                         break;
406 
407                 case VTE_SEQ_INTERMEDIATE_POPEN:  /* G0-designate 94^n-set */
408                 case VTE_SEQ_INTERMEDIATE_PCLOSE: /* G1-designate 94^n-set */
409                 case VTE_SEQ_INTERMEDIATE_MULT:   /* G2-designate 94^n-set */
410                 case VTE_SEQ_INTERMEDIATE_PLUS:   /* G3-designate 94^n-set */
411                         *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_94_n(seq->terminator,
412                                                                           remaining_intermediates),
413                                                    intermediate1 - VTE_SEQ_INTERMEDIATE_POPEN);
414                         return VTE_CMD_GnDMm;
415 
416                 case VTE_SEQ_INTERMEDIATE_COMMA:  /* Reserved for future standardisation */
417                         break;
418 
419                 case VTE_SEQ_INTERMEDIATE_MINUS:  /* G1-designate 96^n-set */
420                 case VTE_SEQ_INTERMEDIATE_DOT:    /* G2-designate 96^n-set */
421                 case VTE_SEQ_INTERMEDIATE_SLASH:  /* G3-designate 96^n-set */
422                         *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_96_n(seq->terminator,
423                                                                           remaining_intermediates),
424                                                    intermediate1 - VTE_SEQ_INTERMEDIATE_COMMA);
425                         return VTE_CMD_GnDMm;
426                 }
427                 break;
428         }
429 
430         case VTE_SEQ_INTERMEDIATE_PERCENT: /* Designate other coding system */
431                 *cs_out = vte_parse_charset_ocs(seq->terminator,
432                                                 VTE_SEQ_REMOVE_INTERMEDIATE(intermediates));
433                 return VTE_CMD_DOCS;
434 
435         case VTE_SEQ_INTERMEDIATE_AND:     /* Identify revised registration */
436                 if (VTE_SEQ_REMOVE_INTERMEDIATE(intermediates) == 0)
437                         return VTE_CMD_IRR;
438                 break;
439 
440         case VTE_SEQ_INTERMEDIATE_SQUOTE:  /* Reserved for future standardisation */
441                 break;
442 
443         case VTE_SEQ_INTERMEDIATE_POPEN:   /* G0-designate 94-set */
444         case VTE_SEQ_INTERMEDIATE_PCLOSE:  /* G1-designate 94-set */
445         case VTE_SEQ_INTERMEDIATE_MULT:    /* G2-designate 94-set */
446         case VTE_SEQ_INTERMEDIATE_PLUS:    /* G3-designate 94-set */
447                 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_94(seq->terminator,
448                                                                 VTE_SEQ_REMOVE_INTERMEDIATE(intermediates)),
449                                            intermediate0 - VTE_SEQ_INTERMEDIATE_POPEN);
450                 return VTE_CMD_GnDm;
451 
452         case VTE_SEQ_INTERMEDIATE_COMMA:   /* Reserved for future standardisation */
453                 break;
454 
455         case VTE_SEQ_INTERMEDIATE_MINUS:   /* G1-designate 96-set */
456         case VTE_SEQ_INTERMEDIATE_DOT:     /* G2-designate 96-set */
457         case VTE_SEQ_INTERMEDIATE_SLASH:   /* G3-designate 96-set */
458                 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_96(seq->terminator,
459                                                                 VTE_SEQ_REMOVE_INTERMEDIATE(intermediates)),
460                                            intermediate0 - VTE_SEQ_INTERMEDIATE_COMMA);
461                 return VTE_CMD_GnDm;
462         }
463 
464         return VTE_CMD_NONE;
465 }
466 
467 static unsigned int
vte_parse_host_csi(vte_seq_t const * seq)468 vte_parse_host_csi(vte_seq_t const* seq)
469 {
470         switch (_VTE_SEQ_CODE(seq->terminator, seq->intermediates)) {
471 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
472                 case _VTE_SEQ_CODE(f, _VTE_SEQ_CODE_COMBINE(VTE_SEQ_PARAMETER_##p, VTE_SEQ_INTERMEDIATE_##i)): return VTE_CMD_##cmd;
473 #include "parser-csi.hh"
474 #undef _VTE_SEQ
475         default: return VTE_CMD_NONE;
476         }
477 }
478 
479 static unsigned int
vte_parse_host_dcs(vte_seq_t const * seq,unsigned int * flagsptr)480 vte_parse_host_dcs(vte_seq_t const* seq,
481                    unsigned int* flagsptr)
482 {
483         switch (_VTE_SEQ_CODE(seq->terminator, seq->intermediates)) {
484 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
485                 case _VTE_SEQ_CODE(f, _VTE_SEQ_CODE_COMBINE(VTE_SEQ_PARAMETER_##p, VTE_SEQ_INTERMEDIATE_##i)): *flagsptr = flags; return VTE_CMD_##cmd;
486 #include "parser-dcs.hh"
487 #undef _VTE_SEQ
488         default: return VTE_CMD_NONE;
489         }
490 }
491 
492 static unsigned int
vte_parse_host_sci(vte_seq_t const * seq)493 vte_parse_host_sci(vte_seq_t const* seq)
494 {
495         switch (_VTE_SEQ_CODE(seq->terminator, 0)) {
496 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
497                 case _VTE_SEQ_CODE(f, 0): return VTE_CMD_##cmd;
498 #include "parser-sci.hh"
499 #undef _VTE_SEQ
500         default: return VTE_CMD_NONE;
501         }
502 }
503 
504 /*
505  * State Machine
506  * This parser controls the parser-state and returns any detected sequence to
507  * the caller. The parser is based on this state-diagram from Paul Williams:
508  *   https://vt100.net/emu/
509  * It was written from scratch and extended where needed.
510  * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
511  * input. It's the callers responsibility to do any UTF-8 parsing.
512  */
513 
514 enum parser_state_t {
515         STATE_GROUND,           /* initial state and ground */
516         STATE_DCS_PASS_ESC,     /* ESC after DCS which may be ESC \ aka C0 ST */
517         STATE_OSC_STRING_ESC,   /* ESC after OSC which may be ESC \ aka C0 ST */
518         STATE_ESC,              /* ESC sequence was started */
519         STATE_ESC_INT,          /* intermediate escape characters */
520         STATE_CSI_ENTRY,        /* starting CSI sequence */
521         STATE_CSI_PARAM,        /* CSI parameters */
522         STATE_CSI_INT,          /* intermediate CSI characters */
523         STATE_CSI_IGNORE,       /* CSI error; ignore this CSI sequence */
524         STATE_DCS_ENTRY,        /* starting DCS sequence */
525         STATE_DCS_PARAM,        /* DCS parameters */
526         STATE_DCS_INT,          /* intermediate DCS characters */
527         STATE_DCS_PASS,         /* DCS data passthrough */
528         STATE_DCS_IGNORE,       /* DCS error; ignore this DCS sequence */
529         STATE_OSC_STRING,       /* parsing OSC sequence */
530         STATE_ST_IGNORE,        /* unimplemented seq; ignore until ST */
531         STATE_SCI,              /* single character introducer sequence was started */
532 
533         STATE_N,
534 };
535 
536 /* Parser state transitioning */
537 
538 typedef int (* parser_action_func)(vte_parser_t* parser, uint32_t raw);
539 
540 // FIXMEchpe: I get weird performance results here from
541 // either not inlining, inlining these function or the
542 // macros below. Sometimes (after a recompile) one is
543 // (as much as 50%!) slower, sometimes the other one etc. ‽
544 
545 #if 1 // (inline) functions
546 
547 // #define PTINLINE inline
548 #define PTINLINE
549 
550 /* nop */
551 static PTINLINE int
parser_nop(vte_parser_t * parser,uint32_t raw)552 parser_nop(vte_parser_t* parser,
553            uint32_t raw)
554 {
555         return VTE_SEQ_NONE;
556 }
557 /* dispatch related actions */
558 static PTINLINE int
parser_action(vte_parser_t * parser,uint32_t raw,parser_action_func action)559 parser_action(vte_parser_t* parser,
560               uint32_t raw,
561               parser_action_func action)
562 {
563         return action(parser, raw);
564 }
565 
566 /* perform state transition */
567 static PTINLINE int
parser_transition_no_action(vte_parser_t * parser,uint32_t raw,unsigned int state)568 parser_transition_no_action(vte_parser_t* parser,
569                             uint32_t raw,
570                             unsigned int state)
571 {
572         parser->state = state;
573         return VTE_SEQ_NONE;
574 }
575 
576 /* perform state transition and dispatch related actions */
577 static PTINLINE int
parser_transition(vte_parser_t * parser,uint32_t raw,unsigned int state,parser_action_func action)578 parser_transition(vte_parser_t* parser,
579                   uint32_t raw,
580                   unsigned int state,
581                   parser_action_func action)
582 {
583         parser->state = state;
584 
585         return action(parser, raw);
586 }
587 
588 #undef PTINLINE
589 
590 #else // macros
591 
592 /* nop */
593 #define parser_nop(parser,raw) \
594         ({ VTE_SEQ_NONE; })
595 
596 /* dispatch related actions */
597 #define parser_action(p,r,a) \
598         ({ \
599                 a((p), (r)); \
600         })
601 
602 /* perform state transition */
603 #define parser_transition_no_action(p,r,s) \
604         ({ \
605                 parser->state = s; \
606                 VTE_SEQ_NONE; \
607         })
608 
609 /* perform state transition and dispatch related actions */
610 #define parser_transition(p,r,s,a) \
611         ({ \
612                 (p)->state = s; \
613                 a((p), (r)); \
614         })
615 
616 #endif // (inline) functions or macros
617 
618 /**
619  * vte_parser_init() - Initialise parser object
620  * @parser: the struct vte_parser
621  */
622 void
vte_parser_init(vte_parser_t * parser)623 vte_parser_init(vte_parser_t* parser)
624 {
625         memset(parser, 0, sizeof(*parser));
626         vte_seq_string_init(&parser->seq.arg_str);
627 }
628 
629 /**
630  * vte_parser_deinit() - Deinitialises parser object
631  * @parser: parser object to deinitialise
632  */
633 void
vte_parser_deinit(vte_parser_t * parser)634 vte_parser_deinit(vte_parser_t* parser)
635 {
636         vte_seq_string_free(&parser->seq.arg_str);
637 }
638 
639 static inline int
parser_clear(vte_parser_t * parser,uint32_t raw)640 parser_clear(vte_parser_t* parser,
641              uint32_t raw)
642 {
643         /* seq.command is set when the sequence is executed,
644          * seq.terminator is set when the final character is received,
645          * and seq.introducer is set when the introducer is received,
646          * and all this happens before the sequence is dispatched.
647          * Therefore these fiedls need not be cleared in any case.
648          */
649         return VTE_SEQ_NONE;
650 }
651 
652 static inline int
parser_clear_int(vte_parser_t * parser,uint32_t raw)653 parser_clear_int(vte_parser_t* parser,
654                  uint32_t raw)
655 {
656         parser->seq.intermediates = 0;
657         parser->seq.n_intermediates = 0;
658 
659         return parser_clear(parser, raw);
660 }
661 
662 static inline int
parser_clear_params(vte_parser_t * parser,uint32_t raw)663 parser_clear_params(vte_parser_t* parser,
664                     uint32_t raw)
665 {
666         /* The (n_args+1)th parameter may have been started but not
667          * finialised, so it needs cleaning too. All further params
668          * have not been touched, so need not be cleaned.
669          */
670         unsigned int n_args = G_UNLIKELY(parser->seq.n_args >= VTE_PARSER_ARG_MAX)
671                 ? VTE_PARSER_ARG_MAX
672                 : parser->seq.n_args + 1;
673         memset(parser->seq.args, 0, n_args * sizeof(parser->seq.args[0]));
674 #ifdef PARSER_EXTRA_CLEAN
675         /* Assert that the assumed-clean params are actually clean. */
676         for (unsigned int n = n_args; n < VTE_PARSER_ARG_MAX; ++n)
677                 g_assert_cmpuint(parser->seq.args[n], ==, VTE_SEQ_ARG_INIT_DEFAULT);
678 #endif
679 
680         parser->seq.n_args = 0;
681         parser->seq.n_final_args = 0;
682 
683         return VTE_SEQ_NONE;
684 }
685 
686 static inline int
parser_clear_int_and_params(vte_parser_t * parser,uint32_t raw)687 parser_clear_int_and_params(vte_parser_t* parser,
688                             uint32_t raw)
689 {
690         parser_clear_int(parser, raw);
691         return parser_clear_params(parser, raw);
692 }
693 
694 static int
parser_ignore(vte_parser_t * parser,uint32_t raw)695 parser_ignore(vte_parser_t* parser,
696               uint32_t raw)
697 {
698         parser->seq.type = VTE_SEQ_IGNORE;
699         parser->seq.command = VTE_CMD_NONE;
700         parser->seq.terminator = raw;
701 
702         return parser->seq.type;
703 }
704 
705 static int
parser_print(vte_parser_t * parser,uint32_t raw)706 parser_print(vte_parser_t* parser,
707              uint32_t raw)
708 {
709         parser->seq.type = VTE_SEQ_GRAPHIC;
710         parser->seq.command = VTE_CMD_GRAPHIC;
711         parser->seq.terminator = raw;
712 
713         return parser->seq.type;
714 }
715 
716 static int
parser_execute(vte_parser_t * parser,uint32_t raw)717 parser_execute(vte_parser_t* parser,
718                uint32_t raw)
719 {
720         parser->seq.type = VTE_SEQ_CONTROL;
721         parser->seq.terminator = raw;
722         parser->seq.command = vte_parse_host_control(&parser->seq);
723 
724         return parser->seq.type;
725 }
726 
727 static int
parser_collect_esc(vte_parser_t * parser,uint32_t raw)728 parser_collect_esc(vte_parser_t* parser,
729                    uint32_t raw)
730 {
731         assert(raw >= 0x20 && raw <= 0x2f);
732 
733         /* ESCAPE sequences only have intermediates or 2/0..2/15, so there's no
734          * need for the extra shift as below for CSI/DCS sequences
735          */
736         parser->seq.intermediates |= (VTE_SEQ_MAKE_INTERMEDIATE(raw) << (VTE_SEQ_INTERMEDIATE_BITS * parser->seq.n_intermediates++));
737 
738         return VTE_SEQ_NONE;
739 }
740 
741 static int
parser_collect_csi(vte_parser_t * parser,uint32_t raw)742 parser_collect_csi(vte_parser_t* parser,
743                    uint32_t raw)
744 {
745         assert(raw >= 0x20 && raw <= 0x2f);
746 
747         /* In addition to 2/0..2/15 intermediates, CSI/DCS sequence
748          * can also have one parameter byte 3/12..3/15 at the
749          * start of the parameters (see parser_collect_parameter below);
750          * that's what the extra shift is for.
751          */
752         parser->seq.intermediates |= (VTE_SEQ_MAKE_INTERMEDIATE(raw) << (VTE_SEQ_PARAMETER_BITS +
753                                                                          VTE_SEQ_INTERMEDIATE_BITS * parser->seq.n_intermediates++));
754 
755         return VTE_SEQ_NONE;
756 }
757 
758 static int
parser_collect_parameter(vte_parser_t * parser,uint32_t raw)759 parser_collect_parameter(vte_parser_t* parser,
760                          uint32_t raw)
761 {
762         assert(raw >= 0x3c && raw <= 0x3f);
763 
764         /* CSI/DCS may optionally have one parameter byte from 3/12..3/15
765          * at the start of the parameters; we put that into the lowest
766          * part of @seq.intermediates.
767          * Note that there can only be *one* such byte; the state machine
768          * already enforces that, so we do not need any additional checks
769          * here.
770          */
771         parser->seq.intermediates |= VTE_SEQ_MAKE_PARAMETER(raw);
772 
773         return VTE_SEQ_NONE;
774 }
775 
776 static void
parser_params_overflow(vte_parser_t * parser,uint32_t raw)777 parser_params_overflow(vte_parser_t* parser,
778                        uint32_t raw)
779 {
780         /* An overflow of the parameter number can only happen in
781          * STATE_{CSI,DCS}_PARAM, and it occurs when
782          * seq.n_arg == VTE_PARSER_ARG_MAX, and either an 0…9
783          * is encountered, starting the next param, or an
784          * explicit ':' or ';' terminating a (defaulted) (sub)param,
785          * or when the intermediates/final character(s) occur
786          * after a defaulted (sub)param.
787          *
788          * Transition to STATE_{CSI,DCS}_IGNORE to ignore the
789          * whole sequence.
790          */
791         parser_transition_no_action(parser,
792                                     raw,
793                                     parser->state == STATE_CSI_PARAM ?
794                                     STATE_CSI_IGNORE : STATE_DCS_IGNORE);
795 }
796 
797 /* The next two functions are only called when encountering a ';' or ':',
798  * so if there's already MAX-1 parameters, the ';' or ':' would finish
799  * the MAXth parameter and there would be a default or non-default
800  * MAX+1th parameter following it.
801  */
802 static int
parser_finish_param(vte_parser_t * parser,uint32_t raw)803 parser_finish_param(vte_parser_t* parser,
804                     uint32_t raw)
805 {
806         if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX - 1)) {
807                 vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], false);
808                 ++parser->seq.n_args;
809                 ++parser->seq.n_final_args;
810         } else
811                 parser_params_overflow(parser, raw);
812 
813         return VTE_SEQ_NONE;
814 }
815 
816 static int
parser_finish_subparam(vte_parser_t * parser,uint32_t raw)817 parser_finish_subparam(vte_parser_t* parser,
818                        uint32_t raw)
819 {
820         if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX - 1)) {
821                 vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], true);
822                 ++parser->seq.n_args;
823         } else
824                 parser_params_overflow(parser, raw);
825 
826         return VTE_SEQ_NONE;
827 }
828 
829 static int
parser_param(vte_parser_t * parser,uint32_t raw)830 parser_param(vte_parser_t* parser,
831              uint32_t raw)
832 {
833         /* assert(raw >= '0' && raw <= '9'); */
834 
835         if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX))
836                 vte_seq_arg_push(&parser->seq.args[parser->seq.n_args], raw);
837         else
838                 parser_params_overflow(parser, raw);
839 
840         return VTE_SEQ_NONE;
841 }
842 
843 static inline int
parser_osc_start(vte_parser_t * parser,uint32_t raw)844 parser_osc_start(vte_parser_t* parser,
845                  uint32_t raw)
846 {
847         parser_clear(parser, raw);
848 
849         vte_seq_string_reset(&parser->seq.arg_str);
850 
851         parser->seq.introducer = raw;
852         return VTE_SEQ_NONE;
853 }
854 
855 static int
parser_osc_collect(vte_parser_t * parser,uint32_t raw)856 parser_osc_collect(vte_parser_t* parser,
857                    uint32_t raw)
858 {
859         /*
860          * Only characters from 0x20..0x7e and >= 0xa0 are allowed here.
861          * Our state-machine already verifies those restrictions.
862          */
863 
864         if (G_UNLIKELY(!vte_seq_string_push(&parser->seq.arg_str, raw)))
865                 parser->state = STATE_ST_IGNORE;
866 
867         return VTE_SEQ_NONE;
868 }
869 
870 static int
parser_dcs_start(vte_parser_t * parser,uint32_t raw)871 parser_dcs_start(vte_parser_t* parser,
872                  uint32_t raw)
873 {
874         parser_clear_int_and_params(parser, raw);
875 
876         vte_seq_string_reset(&parser->seq.arg_str);
877 
878         parser->seq.introducer = raw;
879         return VTE_SEQ_NONE;
880 }
881 
882 static int
parser_dcs_consume(vte_parser_t * parser,uint32_t raw)883 parser_dcs_consume(vte_parser_t* parser,
884                    uint32_t raw)
885 {
886         /* parser->seq is cleared during DCS-START state, thus there's no need
887          * to clear invalid fields here. */
888 
889         if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX)) {
890                 if (parser->seq.n_args > 0 ||
891                     vte_seq_arg_started(parser->seq.args[parser->seq.n_args])) {
892                         vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], false);
893                         ++parser->seq.n_args;
894                         ++parser->seq.n_final_args;
895                 }
896         }
897 
898         parser->seq.type = VTE_SEQ_DCS;
899         parser->seq.terminator = raw;
900         parser->seq.st = 0;
901 
902         auto flags = unsigned{};
903         parser->seq.command = vte_parse_host_dcs(&parser->seq, &flags);
904 
905         return (flags & VTE_DISPATCH_UNRIPE) && parser->dispatch_unripe ? VTE_SEQ_DCS : VTE_SEQ_NONE;
906 }
907 
908 static int
parser_dcs_collect(vte_parser_t * parser,uint32_t raw)909 parser_dcs_collect(vte_parser_t* parser,
910                    uint32_t raw)
911 {
912         if (G_UNLIKELY(!vte_seq_string_push(&parser->seq.arg_str, raw)))
913                 parser->state = STATE_DCS_IGNORE;
914 
915         return VTE_SEQ_NONE;
916 }
917 
918 static int
parser_esc(vte_parser_t * parser,uint32_t raw)919 parser_esc(vte_parser_t* parser,
920            uint32_t raw)
921 {
922         parser->seq.type = VTE_SEQ_ESCAPE;
923         parser->seq.terminator = raw;
924         parser->seq.charset = VTE_CHARSET_NONE;
925         parser->seq.command = vte_parse_host_escape(&parser->seq,
926                                                     &parser->seq.charset);
927 
928         return parser->seq.type;
929 }
930 
931 static int
parser_csi(vte_parser_t * parser,uint32_t raw)932 parser_csi(vte_parser_t* parser,
933            uint32_t raw)
934 {
935         /* parser->seq is cleared during CSI-ENTER state, thus there's no need
936          * to clear invalid fields here. */
937 
938         if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX)) {
939                 if (parser->seq.n_args > 0 ||
940                     vte_seq_arg_started(parser->seq.args[parser->seq.n_args])) {
941                         vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], false);
942                         ++parser->seq.n_args;
943                         ++parser->seq.n_final_args;
944                 }
945         }
946 
947         parser->seq.type = VTE_SEQ_CSI;
948         parser->seq.terminator = raw;
949         parser->seq.command = vte_parse_host_csi(&parser->seq);
950 
951         return parser->seq.type;
952 }
953 
954 static int
parser_osc(vte_parser_t * parser,uint32_t raw)955 parser_osc(vte_parser_t* parser,
956            uint32_t raw)
957 {
958         /* parser->seq is cleared during OSC_START state, thus there's no need
959          * to clear invalid fields here. */
960 
961         vte_seq_string_finish(&parser->seq.arg_str);
962 
963         /* We only dispatch a DCS if the introducer and string
964          * terminator are from the same control set, i.e. both
965          * C0 or both C1; we discard sequences with mixed controls.
966          */
967         if (!parser_check_matching_controls(parser->seq.introducer, raw))
968                 return VTE_SEQ_IGNORE;
969 
970         parser->seq.type = VTE_SEQ_OSC;
971         parser->seq.command = VTE_CMD_OSC;
972         parser->seq.st = raw;
973 
974         return parser->seq.type;
975 }
976 
977 static int
parser_dcs(vte_parser_t * parser,uint32_t raw)978 parser_dcs(vte_parser_t* parser,
979            uint32_t raw)
980 {
981         /* Most of parser->seq was already filled in parser_dcs_consume() */
982         parser->seq.st = raw;
983 
984         vte_seq_string_finish(&parser->seq.arg_str);
985 
986         /* We only dispatch a DCS if the introducer and string
987          * terminator are from the same control set, i.e. both
988          * C0 or both C1; we discard sequences with mixed controls.
989          */
990         if (!parser_check_matching_controls(parser->seq.introducer, raw))
991                 return VTE_SEQ_IGNORE;
992 
993         return parser->seq.type;
994 }
995 
996 static int
parser_sci(vte_parser_t * parser,uint32_t raw)997 parser_sci(vte_parser_t* parser,
998            uint32_t raw)
999 {
1000         parser->seq.type = VTE_SEQ_SCI;
1001         parser->seq.terminator = raw;
1002         parser->seq.command = vte_parse_host_sci(&parser->seq);
1003 
1004         return parser->seq.type;
1005 }
1006 
1007 #define ACTION_CLEAR parser_clear
1008 #define ACTION_CLEAR_INT parser_clear_int
1009 #define ACTION_CLEAR_INT_AND_PARAMS parser_clear_int_and_params
1010 #define ACTION_CLEAR_PARAMS_ONLY parser_clear_params
1011 #define ACTION_IGNORE parser_ignore
1012 #define ACTION_PRINT parser_print
1013 #define ACTION_EXECUTE parser_execute
1014 #define ACTION_COLLECT_ESC parser_collect_esc
1015 #define ACTION_COLLECT_CSI parser_collect_csi
1016 #define ACTION_COLLECT_DCS ACTION_COLLECT_CSI
1017 #define ACTION_COLLECT_PARAMETER parser_collect_parameter
1018 #define ACTION_PARAM parser_param
1019 #define ACTION_FINISH_PARAM parser_finish_param
1020 #define ACTION_FINISH_SUBPARAM parser_finish_subparam
1021 #define ACTION_ESC_DISPATCH parser_esc
1022 #define ACTION_CSI_DISPATCH parser_csi
1023 #define ACTION_DCS_START parser_dcs_start
1024 #define ACTION_DCS_CONSUME parser_dcs_consume
1025 #define ACTION_DCS_COLLECT parser_dcs_collect
1026 #define ACTION_DCS_DISPATCH parser_dcs
1027 #define ACTION_OSC_START parser_osc_start
1028 #define ACTION_OSC_COLLECT parser_osc_collect
1029 #define ACTION_OSC_DISPATCH parser_osc
1030 #define ACTION_SCI_DISPATCH parser_sci
1031 
1032 static int
parser_feed_to_state(vte_parser_t * parser,uint32_t raw)1033 parser_feed_to_state(vte_parser_t* parser,
1034                      uint32_t raw)
1035 {
1036         switch (parser->state) {
1037         case STATE_GROUND:
1038                 switch (raw) {
1039                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1040                 case 0x1c ... 0x1f:
1041                 case 0x80 ... 0x9f:        /* C1 */
1042                         return parser_action(parser, raw,
1043                                              ACTION_EXECUTE);
1044                 case 0x1b:                /* ESC */
1045                         return parser_transition(parser, raw, STATE_ESC,
1046                                                  ACTION_CLEAR_INT);
1047                 }
1048 
1049                 return parser_action(parser, raw,
1050                                      ACTION_PRINT);
1051 
1052         case STATE_DCS_PASS_ESC:
1053         case STATE_OSC_STRING_ESC:
1054                 if (raw == 0x5c /* '\' */) {
1055                         switch (parser->state) {
1056                         case STATE_DCS_PASS_ESC:
1057                                 return parser_transition(parser, raw, STATE_GROUND,
1058                                                          ACTION_DCS_DISPATCH);
1059                         case STATE_OSC_STRING_ESC:
1060                                 return parser_transition(parser, raw, STATE_GROUND,
1061                                                          ACTION_OSC_DISPATCH);
1062                         }
1063                 }
1064 
1065                 /* Do the deferred clear and fallthrough to STATE_ESC */
1066                 parser_transition(parser, 0x1b /* ESC */, STATE_ESC,
1067                                   ACTION_CLEAR_INT);
1068 
1069                 [[fallthrough]];
1070         case STATE_ESC:
1071                 switch (raw) {
1072                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1073                 case 0x1c ... 0x1f:
1074                         return parser_action(parser, raw,
1075                                                  ACTION_EXECUTE);
1076                 case 0x1b:                /* ESC */
1077                         return parser_transition(parser, raw, STATE_ESC,
1078                                                  ACTION_CLEAR_INT);
1079                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1080                         return parser_transition(parser, raw, STATE_ESC_INT,
1081                                                  ACTION_COLLECT_ESC);
1082                 case 0x30 ... 0x4f:        /* ['0' - '~'] \ */
1083                 case 0x51 ... 0x57:        /* { 'P', 'X', 'Z' '[', ']', '^', '_' } */
1084                 case 0x59:
1085                 case 0x5c:
1086                 case 0x60 ... 0x7e:
1087                         return parser_transition(parser, raw, STATE_GROUND,
1088                                                  ACTION_ESC_DISPATCH);
1089                 case 0x50:                /* 'P' */
1090                         return parser_transition(parser, raw, STATE_DCS_ENTRY,
1091                                                  ACTION_DCS_START);
1092                 case 0x5a:                /* 'Z' */
1093                         return parser_transition(parser, raw, STATE_SCI,
1094                                                  ACTION_CLEAR);
1095                 case 0x5b:                /* '[' */
1096                         return parser_transition(parser, raw, STATE_CSI_ENTRY,
1097                                                  ACTION_CLEAR_PARAMS_ONLY
1098                                                  /* rest already cleaned on ESC state entry */);
1099                 case 0x5d:                /* ']' */
1100                         return parser_transition(parser, raw, STATE_OSC_STRING,
1101                                                  ACTION_OSC_START);
1102                 case 0x58:                /* 'X' */
1103                 case 0x5e:                /* '^' */
1104                 case 0x5f:                /* '_' */
1105                         return parser_transition_no_action(parser, raw, STATE_ST_IGNORE);
1106                 case 0x9c:                /* ST */
1107                         return parser_transition(parser, raw, STATE_GROUND,
1108                                                  ACTION_IGNORE);
1109                 }
1110 
1111                 return parser_transition(parser, raw, STATE_GROUND,
1112                                          ACTION_IGNORE);
1113         case STATE_ESC_INT:
1114                 switch (raw) {
1115                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1116                 case 0x1c ... 0x1f:
1117                         return parser_action(parser, raw,
1118                                              ACTION_EXECUTE);
1119                 case 0x1b:                /* ESC */
1120                         return parser_transition(parser, raw, STATE_ESC,
1121                                                  ACTION_CLEAR_INT);
1122                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1123                         return parser_action(parser, raw,
1124                                              ACTION_COLLECT_ESC);
1125                 case 0x30 ... 0x7e:        /* ['0' - '~'] */
1126                         return parser_transition(parser, raw, STATE_GROUND,
1127                                                  ACTION_ESC_DISPATCH);
1128                 case 0x9c:                /* ST */
1129                         return parser_transition(parser, raw, STATE_GROUND,
1130                                                  ACTION_IGNORE);
1131                 }
1132 
1133                 return parser_transition(parser, raw, STATE_GROUND,
1134                                          ACTION_IGNORE);
1135         case STATE_CSI_ENTRY:
1136                 switch (raw) {
1137                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1138                 case 0x1c ... 0x1f:
1139                         return parser_action(parser, raw,
1140                                              ACTION_EXECUTE);
1141                 case 0x1b:                /* ESC */
1142                         return parser_transition(parser, raw, STATE_ESC,
1143                                                  ACTION_CLEAR_INT);
1144                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1145                         return parser_transition(parser, raw, STATE_CSI_INT,
1146                                                  ACTION_COLLECT_CSI);
1147                 case 0x30 ... 0x39:        /* ['0' - '9'] */
1148                         return parser_transition(parser, raw, STATE_CSI_PARAM,
1149                                                  ACTION_PARAM);
1150                 case 0x3a:                 /* ':' */
1151                         return parser_transition(parser, raw, STATE_CSI_PARAM,
1152                                                  ACTION_FINISH_SUBPARAM);
1153                 case 0x3b:                 /* ';' */
1154                         return parser_transition(parser, raw, STATE_CSI_PARAM,
1155                                                  ACTION_FINISH_PARAM);
1156                 case 0x3c ... 0x3f:        /* ['<' - '?'] */
1157                         return parser_transition(parser, raw, STATE_CSI_PARAM,
1158                                                  ACTION_COLLECT_PARAMETER);
1159                 case 0x40 ... 0x7e:        /* ['@' - '~'] */
1160                         return parser_transition(parser, raw, STATE_GROUND,
1161                                                  ACTION_CSI_DISPATCH);
1162                 case 0x9c:                /* ST */
1163                         return parser_transition(parser, raw, STATE_GROUND,
1164                                                  ACTION_IGNORE);
1165                 }
1166 
1167                 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1168         case STATE_CSI_PARAM:
1169                 switch (raw) {
1170                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1171                 case 0x1c ... 0x1f:
1172                         return parser_action(parser, raw,
1173                                              ACTION_EXECUTE);
1174                 case 0x1b:                /* ESC */
1175                         return parser_transition(parser, raw, STATE_ESC,
1176                                                  ACTION_CLEAR_INT);
1177                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1178                         return parser_transition(parser, raw, STATE_CSI_INT,
1179                                                  ACTION_COLLECT_CSI);
1180                 case 0x30 ... 0x39:        /* ['0' - '9'] */
1181                         return parser_action(parser, raw,
1182                                              ACTION_PARAM);
1183                 case 0x3a:                 /* ':' */
1184                         return parser_action(parser, raw,
1185                                              ACTION_FINISH_SUBPARAM);
1186                 case 0x3b:                 /* ';' */
1187                         return parser_action(parser, raw,
1188                                              ACTION_FINISH_PARAM);
1189                 case 0x3c ... 0x3f:        /* ['<' - '?'] */
1190                         return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1191                 case 0x40 ... 0x7e:        /* ['@' - '~'] */
1192                         return parser_transition(parser, raw, STATE_GROUND,
1193                                                  ACTION_CSI_DISPATCH);
1194                 case 0x9c:                /* ST */
1195                         return parser_transition(parser, raw, STATE_GROUND,
1196                                                  ACTION_IGNORE);
1197                 }
1198 
1199                 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1200         case STATE_CSI_INT:
1201                 switch (raw) {
1202                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1203                 case 0x1c ... 0x1f:
1204                         return parser_action(parser, raw,
1205                                              ACTION_EXECUTE);
1206                 case 0x1b:                /* ESC */
1207                         return parser_transition(parser, raw, STATE_ESC,
1208                                                  ACTION_CLEAR_INT);
1209                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1210                         return parser_action(parser, raw,
1211                                              ACTION_COLLECT_CSI);
1212                 case 0x30 ... 0x3f:        /* ['0' - '?'] */
1213                         return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1214                 case 0x40 ... 0x7e:        /* ['@' - '~'] */
1215                         return parser_transition(parser, raw, STATE_GROUND,
1216                                                  ACTION_CSI_DISPATCH);
1217                 case 0x9c:                /* ST */
1218                         return parser_transition(parser, raw, STATE_GROUND,
1219                                                  ACTION_IGNORE);
1220                 }
1221 
1222                 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1223         case STATE_CSI_IGNORE:
1224                 switch (raw) {
1225                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1226                 case 0x1c ... 0x1f:
1227                         return parser_action(parser, raw,
1228                                              ACTION_EXECUTE);
1229                 case 0x1b:                /* ESC */
1230                         return parser_transition(parser, raw, STATE_ESC,
1231                                                  ACTION_CLEAR_INT);
1232                 case 0x20 ... 0x3f:        /* [' ' - '?'] */
1233                         return parser_nop(parser, raw);
1234                 case 0x40 ... 0x7e:        /* ['@' - '~'] */
1235                         return parser_transition_no_action(parser, raw, STATE_GROUND);
1236                 case 0x9c:                /* ST */
1237                         return parser_transition(parser, raw, STATE_GROUND,
1238                                                  ACTION_IGNORE);
1239                 }
1240 
1241                 return parser_nop(parser, raw);
1242         case STATE_DCS_ENTRY:
1243                 switch (raw) {
1244                 case 0x00 ... 0x1a:        /* C0 \ ESC */
1245                 case 0x1c ... 0x1f:
1246                         return parser_action(parser, raw,
1247                                              ACTION_IGNORE);
1248                 case 0x1b:                /* ESC */
1249                         return parser_transition(parser, raw, STATE_ESC,
1250                                                  ACTION_CLEAR_INT);
1251                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1252                         return parser_transition(parser, raw, STATE_DCS_INT,
1253                                                  ACTION_COLLECT_DCS);
1254                 case 0x30 ... 0x39:        /* ['0' - '9'] */
1255                         return parser_transition(parser, raw, STATE_DCS_PARAM,
1256                                                  ACTION_PARAM);
1257                 case 0x3a:                 /* ':' */
1258                         return parser_transition(parser, raw, STATE_DCS_PARAM,
1259                                                  ACTION_FINISH_SUBPARAM);
1260                 case 0x3b:                 /* ';' */
1261                         return parser_transition(parser, raw, STATE_DCS_PARAM,
1262                                                  ACTION_FINISH_PARAM);
1263                 case 0x3c ... 0x3f:        /* ['<' - '?'] */
1264                         return parser_transition(parser, raw, STATE_DCS_PARAM,
1265                                                  ACTION_COLLECT_PARAMETER);
1266                 case 0x40 ... 0x7e:        /* ['@' - '~'] */
1267                         return parser_transition(parser, raw, STATE_DCS_PASS,
1268                                                  ACTION_DCS_CONSUME);
1269                 case 0x9c:                /* ST */
1270                         return parser_transition(parser, raw, STATE_GROUND,
1271                                                  ACTION_IGNORE);
1272                 }
1273 
1274                 return parser_transition(parser, raw,
1275                                          STATE_DCS_PASS, ACTION_DCS_CONSUME);
1276         case STATE_DCS_PARAM:
1277                 switch (raw) {
1278                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1279                 case 0x1c ... 0x1f:
1280                         return parser_action(parser, raw,
1281                                              ACTION_IGNORE);
1282                 case 0x1b:                /* ESC */
1283                         return parser_transition(parser, raw, STATE_ESC,
1284                                                  ACTION_CLEAR_INT);
1285                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1286                         return parser_transition(parser, raw, STATE_DCS_INT,
1287                                                  ACTION_COLLECT_DCS);
1288                 case 0x30 ... 0x39:        /* ['0' - '9'] */
1289                         return parser_action(parser, raw,
1290                                                  ACTION_PARAM);
1291                 case 0x3a:                 /* ':' */
1292                         return parser_action(parser, raw,
1293                                              ACTION_FINISH_SUBPARAM);
1294                 case 0x3b:                 /* ';' */
1295                         return parser_action(parser, raw,
1296                                              ACTION_FINISH_PARAM);
1297                 case 0x3c ... 0x3f:        /* ['<' - '?'] */
1298                         return parser_transition_no_action(parser, raw, STATE_DCS_IGNORE);
1299                 case 0x40 ... 0x7e:        /* ['@' - '~'] */
1300                         return parser_transition(parser, raw, STATE_DCS_PASS,
1301                                                  ACTION_DCS_CONSUME);
1302                 case 0x9c:                /* ST */
1303                         return parser_transition(parser, raw, STATE_GROUND,
1304                                                  ACTION_IGNORE);
1305                 }
1306 
1307                 return parser_transition(parser, raw,
1308                                          STATE_DCS_PASS, ACTION_DCS_CONSUME);
1309         case STATE_DCS_INT:
1310                 switch (raw) {
1311                 case 0x00 ... 0x1a:        /* C0 \ { ESC } */
1312                 case 0x1c ... 0x1f:
1313                         return parser_action(parser, raw,
1314                                              ACTION_IGNORE);
1315                 case 0x1b:                /* ESC */
1316                         return parser_transition(parser, raw, STATE_ESC,
1317                                                  ACTION_CLEAR_INT);
1318                 case 0x20 ... 0x2f:        /* [' ' - '\'] */
1319                         return parser_action(parser, raw,
1320                                              ACTION_COLLECT_DCS);
1321                 case 0x30 ... 0x3f:        /* ['0' - '?'] */
1322                         return parser_transition_no_action(parser, raw, STATE_DCS_IGNORE);
1323                 case 0x40 ... 0x7e:        /* ['@' - '~'] */
1324                         return parser_transition(parser, raw, STATE_DCS_PASS,
1325                                                  ACTION_DCS_CONSUME);
1326                 case 0x9c:                /* ST */
1327                         return parser_transition(parser, raw, STATE_GROUND,
1328                                                  ACTION_IGNORE);
1329                 }
1330 
1331                 return parser_transition(parser, raw,
1332                                          STATE_DCS_PASS, ACTION_DCS_CONSUME);
1333         case STATE_DCS_PASS:
1334                 switch (raw) {
1335                 case 0x00 ... 0x1a:        /* ASCII \ { ESC } */
1336                 case 0x1c ... 0x7f:
1337                         return parser_action(parser, raw,
1338                                              ACTION_DCS_COLLECT);
1339                 case 0x1b:                /* ESC */
1340                         return parser_transition_no_action(parser, raw, STATE_DCS_PASS_ESC);
1341                 case 0x9c:                /* ST */
1342                         return parser_transition(parser, raw, STATE_GROUND,
1343                                                  ACTION_DCS_DISPATCH);
1344                 }
1345 
1346                 return parser_action(parser, raw,
1347                                      ACTION_DCS_COLLECT);
1348         case STATE_DCS_IGNORE:
1349                 switch (raw) {
1350                 case 0x00 ... 0x1a:        /* ASCII \ { ESC } */
1351                 case 0x1c ... 0x7f:
1352                         return parser_nop(parser, raw);
1353                 case 0x1b:                /* ESC */
1354                         return parser_transition(parser, raw, STATE_ESC,
1355                                                  ACTION_CLEAR_INT);
1356                 case 0x9c:                /* ST */
1357                         return parser_transition_no_action(parser, raw, STATE_GROUND);
1358                 }
1359 
1360                 return parser_nop(parser, raw);
1361         case STATE_OSC_STRING:
1362                 switch (raw) {
1363                 case 0x00 ... 0x06:        /* C0 \ { BEL, ESC } */
1364                 case 0x08 ... 0x1a:
1365                 case 0x1c ... 0x1f:
1366                         return parser_nop(parser, raw);
1367                 case 0x1b:                /* ESC */
1368                         return parser_transition_no_action(parser, raw, STATE_OSC_STRING_ESC);
1369                 case 0x20 ... 0x7f:        /* [' ' - DEL] */
1370                         return parser_action(parser, raw,
1371                                              ACTION_OSC_COLLECT);
1372                 case 0x07:                /* BEL */
1373                 case 0x9c:                /* ST */
1374                         return parser_transition(parser, raw, STATE_GROUND,
1375                                                  ACTION_OSC_DISPATCH);
1376                 }
1377 
1378                 return parser_action(parser, raw,
1379                                      ACTION_OSC_COLLECT);
1380         case STATE_ST_IGNORE:
1381                 switch (raw) {
1382                 case 0x00 ... 0x1a:        /* ASCII \ { ESC } */
1383                 case 0x1c ... 0x7f:
1384                         return parser_nop(parser, raw);
1385                 case 0x1b:                /* ESC */
1386                         return parser_transition(parser, raw, STATE_ESC,
1387                                                  ACTION_CLEAR_INT);
1388                 case 0x9c:                /* ST */
1389                         return parser_transition(parser, raw,
1390                                                  STATE_GROUND, ACTION_IGNORE);
1391                 }
1392 
1393                 return parser_nop(parser, raw);
1394         case STATE_SCI:
1395                 switch (raw) {
1396                 case 0x1b:                /* ESC */
1397                         return parser_transition(parser, raw,
1398                                                  STATE_ESC, ACTION_CLEAR_INT);
1399                 case 0x08 ... 0x0d:        /* BS, HT, LF, VT, FF, CR */
1400                 case 0x20 ... 0x7e:        /* [' ' - '~'] */
1401                         return parser_transition(parser, raw, STATE_GROUND,
1402                                                  ACTION_SCI_DISPATCH);
1403                 }
1404 
1405                 return parser_transition(parser, raw, STATE_GROUND,
1406                                          ACTION_IGNORE);
1407         }
1408 
1409         g_assert_not_reached();
1410         return VTE_SEQ_NONE;
1411 }
1412 
1413 int
vte_parser_feed(vte_parser_t * parser,uint32_t raw)1414 vte_parser_feed(vte_parser_t* parser,
1415                 uint32_t raw)
1416 {
1417         /*
1418          * Notes:
1419          *  * DEC treats GR codes as GL. We don't do that as we require UTF-8
1420          *    as charset and, thus, it doesn't make sense to treat GR special.
1421          *  * During control sequences, unexpected C1 codes cancel the sequence
1422          *    and immediately start a new one. C0 codes, however, may or may not
1423          *    be ignored/executed depending on the sequence.
1424          */
1425 
1426         switch (raw) {
1427         case 0x18:                /* CAN */
1428                 return parser_transition(parser, raw,
1429                                          STATE_GROUND, ACTION_IGNORE);
1430         case 0x1a:                /* SUB */
1431                 return parser_transition(parser, raw,
1432                                          STATE_GROUND, ACTION_EXECUTE);
1433         case 0x7f:                 /* DEL */
1434                 return parser_nop(parser, raw);
1435         case 0x80 ... 0x8f:        /* C1 \ {DCS, SOS, SCI, CSI, ST, OSC, PM, APC} */
1436         case 0x91 ... 0x97:
1437         case 0x99:
1438                 return parser_transition(parser, raw,
1439                                          STATE_GROUND, ACTION_EXECUTE);
1440         case 0x98:                /* SOS */
1441         case 0x9e:                /* PM */
1442         case 0x9f:                /* APC */
1443                 return parser_transition_no_action(parser, raw, STATE_ST_IGNORE);
1444                 // FIXMEchpe shouldn't this use ACTION_CLEAR?
1445         case 0x90:                /* DCS */
1446                 return parser_transition(parser, raw,
1447                                          STATE_DCS_ENTRY, ACTION_DCS_START);
1448         case 0x9a:                /* SCI */
1449                 return parser_transition(parser, raw,
1450                                          STATE_SCI, ACTION_CLEAR);
1451         case 0x9d:                /* OSC */
1452                 return parser_transition(parser, raw,
1453                                          STATE_OSC_STRING, ACTION_OSC_START);
1454         case 0x9b:                /* CSI */
1455                 return parser_transition(parser, raw,
1456                                          STATE_CSI_ENTRY, ACTION_CLEAR_INT_AND_PARAMS);
1457         default:
1458                 return parser_feed_to_state(parser, raw);
1459         }
1460 }
1461 
1462 void
vte_parser_reset(vte_parser_t * parser)1463 vte_parser_reset(vte_parser_t* parser)
1464 {
1465         parser_transition(parser, 0, STATE_GROUND, ACTION_IGNORE);
1466 }
1467 
1468 /*
1469  * vte_parser_set_dispatch_unripe:
1470  * @parser: a #vte_parser_t
1471  * @enable:
1472  *
1473  * Enables or disables dispatch of unripe DCS sequences.
1474  * If enabled, known DCS sequences with the %VTE_DISPATCH_UNRIPE
1475  * flag will be dispatched when the Final character is received,
1476  * instead of when the control string terminator (ST) is received.
1477  * The application handling the unripe DCS sequence may then
1478  * either
1479  * * do nothing; in this case the DCS sequence will be dispatched
1480  *   again when the control string was fully received. Ripe and
1481  *   unripe sequences can be distinguished by the value of
1482  *   parser.seq.st which will be 0 for an unripe sequence and
1483  *   either 0x5c (C0 ST) or 0x9c (C1 ST) for a ripe sequence. Or
1484  * * call vte_parser_ignore_until_st(); in this case the DCS
1485  *   sequence will be ignored until after the ST (or an other
1486  *   character that aborts the control string) has been
1487  *   received; or
1488  * * switch to a different parser (e.g. DECSIXEL) to parse the
1489  *   control string directly on-the-fly. Note that in this case,
1490  *   the subparser should take care to handle C0 and C1 controls
1491  *   the same way as this parser would.
1492  */
1493 void
vte_parser_set_dispatch_unripe(vte_parser_t * parser,bool enable)1494 vte_parser_set_dispatch_unripe(vte_parser_t* parser,
1495                                bool enable)
1496 {
1497         parser->dispatch_unripe = enable;
1498 }
1499 
1500 /*
1501  * vte_parser_ignore_until_st:
1502  * @parser: a #vte_parser_t
1503  *
1504  * When used on an unrip %VTE_SEQ_DCS sequence, makes the
1505  * parser ignore everything until the ST is received (or
1506  * the DCS is aborted by the usual other means).
1507  *
1508  * Note that there is some inconsistencies here:
1509  *
1510  * * SUB aborts the DCS in our parser, but e.g. a DECSIXEL
1511  *   parser will handle it as if 3/15 was received.
1512  *
1513  * * the ST terminating the DCS will be dispatched as an ST
1514  *   sequence, instead of producing an IGNORE sequence
1515  *   (this is easily fixable but would slightly complicate
1516  *   the parser for no actual gain).
1517  */
1518 void
vte_parser_ignore_until_st(vte_parser_t * parser)1519 vte_parser_ignore_until_st(vte_parser_t* parser)
1520 {
1521         switch (parser->state) {
1522         case STATE_DCS_PASS:
1523                 parser_transition_no_action(parser, 0, STATE_DCS_IGNORE);
1524                 break;
1525         default:
1526                 g_assert_not_reached();
1527                 break;
1528         }
1529 }
1530