1 /*
2 * Copyright © 2015 David Herrmann <dh.herrmann@gmail.com>
3 * Copyright © 2017, 2018 Christian Persch
4 *
5 * This library is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19 #include "config.h"
20
21 #include "parser.hh"
22
23 #include <cstdio>
24 #include <cstring>
25 #include <cstdlib>
26 #include <cerrno>
27
28 #include <glib.h>
29
30 #include "parser-charset-tables.hh"
31
32 #ifdef PARSER_INCLUDE_NOP
33 #define _VTE_NOQ(...) _VTE_SEQ(__VA_ARGS__)
34 #else
35 #define _VTE_NOQ(...)
36 #endif
37
38 /*
39 * Terminal Parser
40 * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
41 * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
42 * control sequences and generic escape sequences.
43 * The parser itself does not perform any actions but lets the caller react to
44 * detected sequences.
45 *
46 * This parser is mostly DEC VT100+ compatible; known differences are:
47 *
48 * * DEC only recognises up to 16 parameters; vte up to 32 (and that can be easily
49 * extended)
50 *
51 * * DEC's parameter values range is 0..16384; vte supports 0..65535 (16-bit range).
52 *
53 * * When the number of parameter exceeds that number, DEC executes the function
54 * with these parameters, ignoring the excessive parameters; vte ignores the
55 * whole function instead.
56 *
57 * * DEC ignores CSI sequences with colon-separated parameters; vte implements colon-
58 * separated parameters as subparameters (this is an extension taken from ITU-T T.416).
59 *
60 * * DEC executes format effector controls in CSI, OSC, DCS sequences as if the
61 * control was received before the control sequence; vte only does this for CSI
62 * sequences and ignores all controls except ESC and BEL in OSC control strings,
63 * and passes all controls except ESC through to the control string in DCS sequences.
64 *
65 * * DEC only allows ST (either C0 or C1) to terminate OSC strings; vte allows
66 * OSC to be terminated by BEL (this is a deprecated xterm extension).
67 *
68 * * DEC parses ESC Z as DECID, a deprecated function equivalent to DA1; vte
69 * implements ECMA-48's SCI (single character introducer) instead.
70 */
71
72 /*
73 * Command Parser
74 * The ctl-seq parser "vte_parser" only detects whole sequences, it does not
75 * detect the specific command. Once a sequence is parsed, the command-parsers
76 * are used to figure out their meaning.
77 */
78
79 /*
80 * Intermediates (and, for CSI/DCS, the optional parameter character) are
81 * stored efficiently in an unsigned int. Intermediates can be 2/00..2/15,
82 * plus one value for 'no intermediate'; together that fits into 5 bits.
83 * Parameter character can be 'no parameter character', or one from
84 * 3/12..3/15; that fits into 3 bits.
85 *
86 * In @seq.intermediates, the nth intermediates is stored with shift n * 5,
87 * plus (for CSI/DCS) an additional shift of 3 for the parameter character
88 * which is stored at bits 0..2.
89 *
90 * VTE_SEQ_PARAMETER(u) extracts the parameter character
91 * of a CSI or DCS sequence
92 * VTE_SEQ_REMOVE_PARAMETER(u) extracts the intermediates
93 * of a CSI or DCS sequence
94 * VTE_SEQ_INTERMEDIATE(u) extracts the first intermediate from an
95 * intermediates value (for CSI/DCS, that must be without parameter
96 * character, see VTE_SEQ_REMOVE_PARAMETER)
97 * VTE_SEQ_REMOVE_INTERMEDIATE(u) extracts the remaining intermediates
98 * after the first one; use VTE_SEQ_INTERMEDIATE on its return value
99 * to extract the 2nd intermediate, and so on
100 */
101
102 #define VTE_SEQ_PARAMETER_BITS (3)
103 #define VTE_SEQ_INTERMEDIATE_BITS (5)
104 #define VTE_SEQ_INTERMEDIATE_MASK ((1U << VTE_SEQ_INTERMEDIATE_BITS) - 1U)
105 #define VTE_SEQ_PARAMETER_MASK ((1U << VTE_SEQ_PARAMETER_BITS) - 1U)
106 #define VTE_SEQ_PARAMETER(u) ((u) & VTE_SEQ_PARAMETER_MASK)
107 #define VTE_SEQ_REMOVE_PARAMETER(u) ((u) >> VTE_SEQ_PARAMETER_BITS)
108 #define VTE_SEQ_INTERMEDIATE(u) ((u) & VTE_SEQ_INTERMEDIATE_MASK)
109 #define VTE_SEQ_REMOVE_INTERMEDIATE(u) ((u) >> VTE_SEQ_INTERMEDIATE_BITS)
110 #define VTE_MAKE_CHARSET(c,s) ((c) | ((s) << VTE_CHARSET_SLOT_OFFSET))
111
112 /*
113 * _VTE_SEQ_CODE_ESC(final, intermediates):
114 *
115 * Make a value combining the final character and the intermediates,
116 * to be used to match a sequence against known sequences.
117 *
118 * Since this is only used with NONE or HASH as first intermediate,
119 * we can reduce the size of the lookup table by slashing the least
120 * significant bit off.
121 *
122 * Final characters is 3/0..7/14, needing 7 bits.
123 */
124 #define _VTE_SEQ_CODE_ESC(f,i) (((f) - 0x30) | ((i) >> 1) << 7)
125
126 /*
127 * _VTE_SEQ_CODE_COMBINE(parameter, intermediates)
128 *
129 * Combines intermediates and the parameter character into one
130 * value to be used when matching a sequence against known sequences.
131 */
132 #define _VTE_SEQ_CODE_COMBINE(p,i) ((p) | ((i) << VTE_SEQ_PARAMETER_BITS))
133
134 /*
135 * _VTE_SEQ_CODE(final, intermediates):
136 *
137 * Make a value combining the final character and the intermediates,
138 * to be used to match a sequence against known sequences. Used for
139 * CSI and DCS sequences; use _VTE_SEQ_CODE_COMBINE to combine
140 * parameter and intermediates into one to pass as 2nd argument here.
141 *
142 * Final character is 4/0..7/14, needing 6 bits.
143 */
144 #define _VTE_SEQ_CODE(f,i) (((f) - 0x40) | ((i) << 6))
145
146 /*
147 * @introducer: either a C1 control, or the final in the equivalent ESC F sequence
148 * @terminator: either a C1 control, or the final in the equivalent ESC F sequence
149 *
150 * Checks whether the OSC/DCS @introducer and the ST @terminator
151 * are from the same control set, i.e. both C0 or both C1.
152 *
153 * For OSC, this check allows C0 OSC with BEL-as-ST to pass, too.
154 */
155 static inline bool
parser_check_matching_controls(uint32_t introducer,uint32_t terminator)156 parser_check_matching_controls(uint32_t introducer,
157 uint32_t terminator)
158 {
159 return ((introducer ^ terminator) & 0x80) == 0;
160 }
161
162 static unsigned int
vte_parse_host_control(vte_seq_t const * seq)163 vte_parse_host_control(vte_seq_t const* seq)
164 {
165 switch (seq->terminator) {
166 #define _VTE_SEQ(cmd,type,f,pi,ni,i0,flags) case f: return VTE_CMD_##cmd;
167 #include "parser-c01.hh"
168 #undef _VTE_SEQ
169 default: return VTE_CMD_NONE;
170 }
171 }
172
173 /* ECMA-35 § 14.1 specifies that the final character 7/14 always identifies
174 * an empty set. Note that that this does not apply for DRCS sets (§ 14.4),
175 * since § 13.3.3 says that all the Ft (4/0..7/14) bytes are private-use.
176 */
177 static inline constexpr unsigned int
charset_empty_or_none(uint32_t raw)178 charset_empty_or_none(uint32_t raw)
179 {
180 return raw == 0x7e ? VTE_CHARSET_EMPTY : VTE_CHARSET_NONE;
181 }
182
183 static unsigned int
vte_parse_charset_94(uint32_t raw,unsigned int intermediates)184 vte_parse_charset_94(uint32_t raw,
185 unsigned int intermediates)
186 {
187 assert (raw >= 0x30 && raw < 0x7f);
188
189 unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
190
191 switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
192 case VTE_SEQ_INTERMEDIATE_NONE:
193 if (remaining_intermediates == 0 &&
194 raw < (0x30 + G_N_ELEMENTS(charset_graphic_94)))
195 return charset_graphic_94[raw - 0x30];
196 break;
197
198 case VTE_SEQ_INTERMEDIATE_SPACE:
199 return VTE_CHARSET_DRCS;
200
201 case VTE_SEQ_INTERMEDIATE_BANG:
202 if (remaining_intermediates == 0 &&
203 raw >= 0x40 && (raw < 0x40 + G_N_ELEMENTS(charset_graphic_94_with_2_1)))
204 return charset_graphic_94_with_2_1[raw - 0x40];
205 break;
206
207 case VTE_SEQ_INTERMEDIATE_DQUOTE:
208 if (remaining_intermediates == 0 &&
209 raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_with_2_2)))
210 return charset_graphic_94_with_2_2[raw - 0x30];
211 break;
212
213 case VTE_SEQ_INTERMEDIATE_HASH:
214 case VTE_SEQ_INTERMEDIATE_CASH:
215 break;
216
217 case VTE_SEQ_INTERMEDIATE_PERCENT:
218 if (remaining_intermediates == 0 &&
219 raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_with_2_5)))
220 return charset_graphic_94_with_2_5[raw - 0x30];
221 break;
222
223 case VTE_SEQ_INTERMEDIATE_AND:
224 if (remaining_intermediates == 0 &&
225 raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_with_2_6)))
226 return charset_graphic_94_with_2_6[raw - 0x30];
227 break;
228 }
229
230 return charset_empty_or_none(raw);
231 }
232
233 static unsigned int
vte_parse_charset_94_n(uint32_t raw,unsigned int intermediates)234 vte_parse_charset_94_n(uint32_t raw,
235 unsigned int intermediates)
236 {
237 assert (raw >= 0x30 && raw < 0x7f);
238
239 unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
240
241 switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
242 case VTE_SEQ_INTERMEDIATE_NONE:
243 if (remaining_intermediates == 0 &&
244 raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_n)))
245 return charset_graphic_94_n[raw - 0x30];
246 break;
247
248 case VTE_SEQ_INTERMEDIATE_SPACE:
249 return VTE_CHARSET_DRCS;
250
251 case VTE_SEQ_INTERMEDIATE_BANG:
252 if (remaining_intermediates == 0 &&
253 raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_n_with_2_1)))
254 return charset_graphic_94_n_with_2_1[raw - 0x30];
255 break;
256 }
257
258 return charset_empty_or_none(raw);
259 }
260
261 static unsigned int
vte_parse_charset_96(uint32_t raw,unsigned int intermediates)262 vte_parse_charset_96(uint32_t raw,
263 unsigned int intermediates)
264 {
265 assert (raw >= 0x30 && raw < 0x7f);
266
267 unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
268
269 switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
270 case VTE_SEQ_INTERMEDIATE_NONE:
271 if (remaining_intermediates == 0 &&
272 raw < (0x30 + G_N_ELEMENTS(charset_graphic_96)))
273 return charset_graphic_96[raw - 0x30];
274 break;
275
276 case VTE_SEQ_INTERMEDIATE_SPACE:
277 return VTE_CHARSET_DRCS;
278 }
279
280 return charset_empty_or_none(raw);
281 }
282
283 static unsigned int
vte_parse_charset_96_n(uint32_t raw,unsigned int intermediates)284 vte_parse_charset_96_n(uint32_t raw,
285 unsigned int intermediates)
286 {
287 if (VTE_SEQ_INTERMEDIATE(intermediates) == VTE_SEQ_INTERMEDIATE_SPACE)
288 return VTE_CHARSET_DRCS;
289
290 return charset_empty_or_none(raw);
291 }
292
293 static unsigned int
vte_parse_charset_ocs(uint32_t raw,unsigned int intermediates)294 vte_parse_charset_ocs(uint32_t raw,
295 unsigned int intermediates)
296 {
297 assert (raw >= 0x30 && raw < 0x7f);
298
299 unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
300
301 switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
302 case VTE_SEQ_INTERMEDIATE_NONE: /* OCS with standard return */
303 if (remaining_intermediates == 0 &&
304 raw >= 0x30 && raw < (0x30 + G_N_ELEMENTS(charset_ocs)))
305 return charset_ocs[raw - 0x30];
306 break;
307
308 case VTE_SEQ_INTERMEDIATE_SPACE: /* OCS with standard return */
309 if (remaining_intermediates == 0 &&
310 raw >= 0x30 && raw < (0x30 + G_N_ELEMENTS(charset_ocs_with_2_0)))
311 return charset_ocs_with_2_0[raw - 0x30];
312 break;
313
314 case VTE_SEQ_INTERMEDIATE_BANG ... VTE_SEQ_INTERMEDIATE_DOT: /* OCS with standard return */
315 break;
316
317 case VTE_SEQ_INTERMEDIATE_SLASH: /* OCS without standard return */
318 if (remaining_intermediates == 0 &&
319 raw >= 0x40 && raw < (0x40 + G_N_ELEMENTS(charset_ocs_with_2_15)))
320 return charset_ocs_with_2_15[raw - 0x40];
321 break;
322 }
323
324 return VTE_CHARSET_NONE;
325 }
326
327 static unsigned int
vte_parse_charset_control(uint32_t raw,unsigned int intermediates)328 vte_parse_charset_control(uint32_t raw,
329 unsigned int intermediates)
330 {
331 assert (raw >= 0x30 && raw < 0x7f);
332
333 unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
334
335 switch (VTE_SEQ_INTERMEDIATE(intermediates)) {
336 case VTE_SEQ_INTERMEDIATE_BANG: /* C0 controls */
337 if (remaining_intermediates == 0 &&
338 raw >= 0x40 && raw < (0x40 + G_N_ELEMENTS(charset_control_c0)))
339 return charset_control_c0[raw - 0x40];
340 break;
341
342 case VTE_SEQ_INTERMEDIATE_DQUOTE: /* C1 controls */
343 if (remaining_intermediates == 0 &&
344 raw >= 0x40 && raw < (0x40 + G_N_ELEMENTS(charset_control_c1)))
345 return charset_control_c1[raw - 0x40];
346 break;
347 }
348
349 return charset_empty_or_none(raw);
350 }
351
352 static unsigned int
vte_parse_host_escape(vte_seq_t const * seq,unsigned int * cs_out)353 vte_parse_host_escape(vte_seq_t const* seq,
354 unsigned int *cs_out)
355 {
356 unsigned int intermediates = seq->intermediates;
357 unsigned int intermediate0 = VTE_SEQ_INTERMEDIATE(intermediates);
358
359 /* Switch on the first intermediate */
360 switch (intermediate0) {
361 case VTE_SEQ_INTERMEDIATE_NONE:
362 case VTE_SEQ_INTERMEDIATE_HASH: { /* Single control functions */
363 switch (_VTE_SEQ_CODE_ESC(seq->terminator, intermediates)) {
364 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
365 case _VTE_SEQ_CODE_ESC(f, VTE_SEQ_INTERMEDIATE_##i): return VTE_CMD_##cmd;
366 #include "parser-esc.hh"
367 #undef _VTE_SEQ
368 default: return VTE_CMD_NONE;
369 }
370 break;
371 }
372
373 case VTE_SEQ_INTERMEDIATE_SPACE: /* Announce code structure */
374 if (VTE_SEQ_REMOVE_INTERMEDIATE(intermediates) == 0)
375 return VTE_CMD_ACS;
376 break;
377
378 case VTE_SEQ_INTERMEDIATE_BANG: /* C0-designate */
379 case VTE_SEQ_INTERMEDIATE_DQUOTE: /* C1-designate */
380 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_control(seq->terminator, intermediates),
381 intermediate0 - VTE_SEQ_INTERMEDIATE_BANG);
382 return VTE_CMD_CnD;
383
384 case VTE_SEQ_INTERMEDIATE_CASH: { /* Designate multi-byte character sets */
385 unsigned int remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(intermediates);
386 unsigned int intermediate1 = VTE_SEQ_INTERMEDIATE(remaining_intermediates);
387 remaining_intermediates = VTE_SEQ_REMOVE_INTERMEDIATE(remaining_intermediates);
388
389 /* Check the 2nd intermediate */
390 switch (intermediate1) {
391 case VTE_SEQ_INTERMEDIATE_NONE:
392 /* For compatibility with an earlier version of ISO-2022,
393 * ESC 2/4 4/0, ESC 2/4 4/1 and ESC 2/4 4/2 designate G0
394 * sets (i.e., without the 2/8 as 2nd intermediate byte).
395 */
396 switch (seq->terminator) {
397 case '@':
398 case 'A':
399 case 'B': /* G0-designate multibyte charset */
400 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_94_n(seq->terminator,
401 remaining_intermediates),
402 0);
403 return VTE_CMD_GnDMm;
404 }
405 break;
406
407 case VTE_SEQ_INTERMEDIATE_POPEN: /* G0-designate 94^n-set */
408 case VTE_SEQ_INTERMEDIATE_PCLOSE: /* G1-designate 94^n-set */
409 case VTE_SEQ_INTERMEDIATE_MULT: /* G2-designate 94^n-set */
410 case VTE_SEQ_INTERMEDIATE_PLUS: /* G3-designate 94^n-set */
411 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_94_n(seq->terminator,
412 remaining_intermediates),
413 intermediate1 - VTE_SEQ_INTERMEDIATE_POPEN);
414 return VTE_CMD_GnDMm;
415
416 case VTE_SEQ_INTERMEDIATE_COMMA: /* Reserved for future standardisation */
417 break;
418
419 case VTE_SEQ_INTERMEDIATE_MINUS: /* G1-designate 96^n-set */
420 case VTE_SEQ_INTERMEDIATE_DOT: /* G2-designate 96^n-set */
421 case VTE_SEQ_INTERMEDIATE_SLASH: /* G3-designate 96^n-set */
422 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_96_n(seq->terminator,
423 remaining_intermediates),
424 intermediate1 - VTE_SEQ_INTERMEDIATE_COMMA);
425 return VTE_CMD_GnDMm;
426 }
427 break;
428 }
429
430 case VTE_SEQ_INTERMEDIATE_PERCENT: /* Designate other coding system */
431 *cs_out = vte_parse_charset_ocs(seq->terminator,
432 VTE_SEQ_REMOVE_INTERMEDIATE(intermediates));
433 return VTE_CMD_DOCS;
434
435 case VTE_SEQ_INTERMEDIATE_AND: /* Identify revised registration */
436 if (VTE_SEQ_REMOVE_INTERMEDIATE(intermediates) == 0)
437 return VTE_CMD_IRR;
438 break;
439
440 case VTE_SEQ_INTERMEDIATE_SQUOTE: /* Reserved for future standardisation */
441 break;
442
443 case VTE_SEQ_INTERMEDIATE_POPEN: /* G0-designate 94-set */
444 case VTE_SEQ_INTERMEDIATE_PCLOSE: /* G1-designate 94-set */
445 case VTE_SEQ_INTERMEDIATE_MULT: /* G2-designate 94-set */
446 case VTE_SEQ_INTERMEDIATE_PLUS: /* G3-designate 94-set */
447 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_94(seq->terminator,
448 VTE_SEQ_REMOVE_INTERMEDIATE(intermediates)),
449 intermediate0 - VTE_SEQ_INTERMEDIATE_POPEN);
450 return VTE_CMD_GnDm;
451
452 case VTE_SEQ_INTERMEDIATE_COMMA: /* Reserved for future standardisation */
453 break;
454
455 case VTE_SEQ_INTERMEDIATE_MINUS: /* G1-designate 96-set */
456 case VTE_SEQ_INTERMEDIATE_DOT: /* G2-designate 96-set */
457 case VTE_SEQ_INTERMEDIATE_SLASH: /* G3-designate 96-set */
458 *cs_out = VTE_MAKE_CHARSET(vte_parse_charset_96(seq->terminator,
459 VTE_SEQ_REMOVE_INTERMEDIATE(intermediates)),
460 intermediate0 - VTE_SEQ_INTERMEDIATE_COMMA);
461 return VTE_CMD_GnDm;
462 }
463
464 return VTE_CMD_NONE;
465 }
466
467 static unsigned int
vte_parse_host_csi(vte_seq_t const * seq)468 vte_parse_host_csi(vte_seq_t const* seq)
469 {
470 switch (_VTE_SEQ_CODE(seq->terminator, seq->intermediates)) {
471 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
472 case _VTE_SEQ_CODE(f, _VTE_SEQ_CODE_COMBINE(VTE_SEQ_PARAMETER_##p, VTE_SEQ_INTERMEDIATE_##i)): return VTE_CMD_##cmd;
473 #include "parser-csi.hh"
474 #undef _VTE_SEQ
475 default: return VTE_CMD_NONE;
476 }
477 }
478
479 static unsigned int
vte_parse_host_dcs(vte_seq_t const * seq,unsigned int * flagsptr)480 vte_parse_host_dcs(vte_seq_t const* seq,
481 unsigned int* flagsptr)
482 {
483 switch (_VTE_SEQ_CODE(seq->terminator, seq->intermediates)) {
484 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
485 case _VTE_SEQ_CODE(f, _VTE_SEQ_CODE_COMBINE(VTE_SEQ_PARAMETER_##p, VTE_SEQ_INTERMEDIATE_##i)): *flagsptr = flags; return VTE_CMD_##cmd;
486 #include "parser-dcs.hh"
487 #undef _VTE_SEQ
488 default: return VTE_CMD_NONE;
489 }
490 }
491
492 static unsigned int
vte_parse_host_sci(vte_seq_t const * seq)493 vte_parse_host_sci(vte_seq_t const* seq)
494 {
495 switch (_VTE_SEQ_CODE(seq->terminator, 0)) {
496 #define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
497 case _VTE_SEQ_CODE(f, 0): return VTE_CMD_##cmd;
498 #include "parser-sci.hh"
499 #undef _VTE_SEQ
500 default: return VTE_CMD_NONE;
501 }
502 }
503
504 /*
505 * State Machine
506 * This parser controls the parser-state and returns any detected sequence to
507 * the caller. The parser is based on this state-diagram from Paul Williams:
508 * https://vt100.net/emu/
509 * It was written from scratch and extended where needed.
510 * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
511 * input. It's the callers responsibility to do any UTF-8 parsing.
512 */
513
514 enum parser_state_t {
515 STATE_GROUND, /* initial state and ground */
516 STATE_DCS_PASS_ESC, /* ESC after DCS which may be ESC \ aka C0 ST */
517 STATE_OSC_STRING_ESC, /* ESC after OSC which may be ESC \ aka C0 ST */
518 STATE_ESC, /* ESC sequence was started */
519 STATE_ESC_INT, /* intermediate escape characters */
520 STATE_CSI_ENTRY, /* starting CSI sequence */
521 STATE_CSI_PARAM, /* CSI parameters */
522 STATE_CSI_INT, /* intermediate CSI characters */
523 STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
524 STATE_DCS_ENTRY, /* starting DCS sequence */
525 STATE_DCS_PARAM, /* DCS parameters */
526 STATE_DCS_INT, /* intermediate DCS characters */
527 STATE_DCS_PASS, /* DCS data passthrough */
528 STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
529 STATE_OSC_STRING, /* parsing OSC sequence */
530 STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
531 STATE_SCI, /* single character introducer sequence was started */
532
533 STATE_N,
534 };
535
536 /* Parser state transitioning */
537
538 typedef int (* parser_action_func)(vte_parser_t* parser, uint32_t raw);
539
540 // FIXMEchpe: I get weird performance results here from
541 // either not inlining, inlining these function or the
542 // macros below. Sometimes (after a recompile) one is
543 // (as much as 50%!) slower, sometimes the other one etc. ‽
544
545 #if 1 // (inline) functions
546
547 // #define PTINLINE inline
548 #define PTINLINE
549
550 /* nop */
551 static PTINLINE int
parser_nop(vte_parser_t * parser,uint32_t raw)552 parser_nop(vte_parser_t* parser,
553 uint32_t raw)
554 {
555 return VTE_SEQ_NONE;
556 }
557 /* dispatch related actions */
558 static PTINLINE int
parser_action(vte_parser_t * parser,uint32_t raw,parser_action_func action)559 parser_action(vte_parser_t* parser,
560 uint32_t raw,
561 parser_action_func action)
562 {
563 return action(parser, raw);
564 }
565
566 /* perform state transition */
567 static PTINLINE int
parser_transition_no_action(vte_parser_t * parser,uint32_t raw,unsigned int state)568 parser_transition_no_action(vte_parser_t* parser,
569 uint32_t raw,
570 unsigned int state)
571 {
572 parser->state = state;
573 return VTE_SEQ_NONE;
574 }
575
576 /* perform state transition and dispatch related actions */
577 static PTINLINE int
parser_transition(vte_parser_t * parser,uint32_t raw,unsigned int state,parser_action_func action)578 parser_transition(vte_parser_t* parser,
579 uint32_t raw,
580 unsigned int state,
581 parser_action_func action)
582 {
583 parser->state = state;
584
585 return action(parser, raw);
586 }
587
588 #undef PTINLINE
589
590 #else // macros
591
592 /* nop */
593 #define parser_nop(parser,raw) \
594 ({ VTE_SEQ_NONE; })
595
596 /* dispatch related actions */
597 #define parser_action(p,r,a) \
598 ({ \
599 a((p), (r)); \
600 })
601
602 /* perform state transition */
603 #define parser_transition_no_action(p,r,s) \
604 ({ \
605 parser->state = s; \
606 VTE_SEQ_NONE; \
607 })
608
609 /* perform state transition and dispatch related actions */
610 #define parser_transition(p,r,s,a) \
611 ({ \
612 (p)->state = s; \
613 a((p), (r)); \
614 })
615
616 #endif // (inline) functions or macros
617
618 /**
619 * vte_parser_init() - Initialise parser object
620 * @parser: the struct vte_parser
621 */
622 void
vte_parser_init(vte_parser_t * parser)623 vte_parser_init(vte_parser_t* parser)
624 {
625 memset(parser, 0, sizeof(*parser));
626 vte_seq_string_init(&parser->seq.arg_str);
627 }
628
629 /**
630 * vte_parser_deinit() - Deinitialises parser object
631 * @parser: parser object to deinitialise
632 */
633 void
vte_parser_deinit(vte_parser_t * parser)634 vte_parser_deinit(vte_parser_t* parser)
635 {
636 vte_seq_string_free(&parser->seq.arg_str);
637 }
638
639 static inline int
parser_clear(vte_parser_t * parser,uint32_t raw)640 parser_clear(vte_parser_t* parser,
641 uint32_t raw)
642 {
643 /* seq.command is set when the sequence is executed,
644 * seq.terminator is set when the final character is received,
645 * and seq.introducer is set when the introducer is received,
646 * and all this happens before the sequence is dispatched.
647 * Therefore these fiedls need not be cleared in any case.
648 */
649 return VTE_SEQ_NONE;
650 }
651
652 static inline int
parser_clear_int(vte_parser_t * parser,uint32_t raw)653 parser_clear_int(vte_parser_t* parser,
654 uint32_t raw)
655 {
656 parser->seq.intermediates = 0;
657 parser->seq.n_intermediates = 0;
658
659 return parser_clear(parser, raw);
660 }
661
662 static inline int
parser_clear_params(vte_parser_t * parser,uint32_t raw)663 parser_clear_params(vte_parser_t* parser,
664 uint32_t raw)
665 {
666 /* The (n_args+1)th parameter may have been started but not
667 * finialised, so it needs cleaning too. All further params
668 * have not been touched, so need not be cleaned.
669 */
670 unsigned int n_args = G_UNLIKELY(parser->seq.n_args >= VTE_PARSER_ARG_MAX)
671 ? VTE_PARSER_ARG_MAX
672 : parser->seq.n_args + 1;
673 memset(parser->seq.args, 0, n_args * sizeof(parser->seq.args[0]));
674 #ifdef PARSER_EXTRA_CLEAN
675 /* Assert that the assumed-clean params are actually clean. */
676 for (unsigned int n = n_args; n < VTE_PARSER_ARG_MAX; ++n)
677 g_assert_cmpuint(parser->seq.args[n], ==, VTE_SEQ_ARG_INIT_DEFAULT);
678 #endif
679
680 parser->seq.n_args = 0;
681 parser->seq.n_final_args = 0;
682
683 return VTE_SEQ_NONE;
684 }
685
686 static inline int
parser_clear_int_and_params(vte_parser_t * parser,uint32_t raw)687 parser_clear_int_and_params(vte_parser_t* parser,
688 uint32_t raw)
689 {
690 parser_clear_int(parser, raw);
691 return parser_clear_params(parser, raw);
692 }
693
694 static int
parser_ignore(vte_parser_t * parser,uint32_t raw)695 parser_ignore(vte_parser_t* parser,
696 uint32_t raw)
697 {
698 parser->seq.type = VTE_SEQ_IGNORE;
699 parser->seq.command = VTE_CMD_NONE;
700 parser->seq.terminator = raw;
701
702 return parser->seq.type;
703 }
704
705 static int
parser_print(vte_parser_t * parser,uint32_t raw)706 parser_print(vte_parser_t* parser,
707 uint32_t raw)
708 {
709 parser->seq.type = VTE_SEQ_GRAPHIC;
710 parser->seq.command = VTE_CMD_GRAPHIC;
711 parser->seq.terminator = raw;
712
713 return parser->seq.type;
714 }
715
716 static int
parser_execute(vte_parser_t * parser,uint32_t raw)717 parser_execute(vte_parser_t* parser,
718 uint32_t raw)
719 {
720 parser->seq.type = VTE_SEQ_CONTROL;
721 parser->seq.terminator = raw;
722 parser->seq.command = vte_parse_host_control(&parser->seq);
723
724 return parser->seq.type;
725 }
726
727 static int
parser_collect_esc(vte_parser_t * parser,uint32_t raw)728 parser_collect_esc(vte_parser_t* parser,
729 uint32_t raw)
730 {
731 assert(raw >= 0x20 && raw <= 0x2f);
732
733 /* ESCAPE sequences only have intermediates or 2/0..2/15, so there's no
734 * need for the extra shift as below for CSI/DCS sequences
735 */
736 parser->seq.intermediates |= (VTE_SEQ_MAKE_INTERMEDIATE(raw) << (VTE_SEQ_INTERMEDIATE_BITS * parser->seq.n_intermediates++));
737
738 return VTE_SEQ_NONE;
739 }
740
741 static int
parser_collect_csi(vte_parser_t * parser,uint32_t raw)742 parser_collect_csi(vte_parser_t* parser,
743 uint32_t raw)
744 {
745 assert(raw >= 0x20 && raw <= 0x2f);
746
747 /* In addition to 2/0..2/15 intermediates, CSI/DCS sequence
748 * can also have one parameter byte 3/12..3/15 at the
749 * start of the parameters (see parser_collect_parameter below);
750 * that's what the extra shift is for.
751 */
752 parser->seq.intermediates |= (VTE_SEQ_MAKE_INTERMEDIATE(raw) << (VTE_SEQ_PARAMETER_BITS +
753 VTE_SEQ_INTERMEDIATE_BITS * parser->seq.n_intermediates++));
754
755 return VTE_SEQ_NONE;
756 }
757
758 static int
parser_collect_parameter(vte_parser_t * parser,uint32_t raw)759 parser_collect_parameter(vte_parser_t* parser,
760 uint32_t raw)
761 {
762 assert(raw >= 0x3c && raw <= 0x3f);
763
764 /* CSI/DCS may optionally have one parameter byte from 3/12..3/15
765 * at the start of the parameters; we put that into the lowest
766 * part of @seq.intermediates.
767 * Note that there can only be *one* such byte; the state machine
768 * already enforces that, so we do not need any additional checks
769 * here.
770 */
771 parser->seq.intermediates |= VTE_SEQ_MAKE_PARAMETER(raw);
772
773 return VTE_SEQ_NONE;
774 }
775
776 static void
parser_params_overflow(vte_parser_t * parser,uint32_t raw)777 parser_params_overflow(vte_parser_t* parser,
778 uint32_t raw)
779 {
780 /* An overflow of the parameter number can only happen in
781 * STATE_{CSI,DCS}_PARAM, and it occurs when
782 * seq.n_arg == VTE_PARSER_ARG_MAX, and either an 0…9
783 * is encountered, starting the next param, or an
784 * explicit ':' or ';' terminating a (defaulted) (sub)param,
785 * or when the intermediates/final character(s) occur
786 * after a defaulted (sub)param.
787 *
788 * Transition to STATE_{CSI,DCS}_IGNORE to ignore the
789 * whole sequence.
790 */
791 parser_transition_no_action(parser,
792 raw,
793 parser->state == STATE_CSI_PARAM ?
794 STATE_CSI_IGNORE : STATE_DCS_IGNORE);
795 }
796
797 /* The next two functions are only called when encountering a ';' or ':',
798 * so if there's already MAX-1 parameters, the ';' or ':' would finish
799 * the MAXth parameter and there would be a default or non-default
800 * MAX+1th parameter following it.
801 */
802 static int
parser_finish_param(vte_parser_t * parser,uint32_t raw)803 parser_finish_param(vte_parser_t* parser,
804 uint32_t raw)
805 {
806 if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX - 1)) {
807 vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], false);
808 ++parser->seq.n_args;
809 ++parser->seq.n_final_args;
810 } else
811 parser_params_overflow(parser, raw);
812
813 return VTE_SEQ_NONE;
814 }
815
816 static int
parser_finish_subparam(vte_parser_t * parser,uint32_t raw)817 parser_finish_subparam(vte_parser_t* parser,
818 uint32_t raw)
819 {
820 if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX - 1)) {
821 vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], true);
822 ++parser->seq.n_args;
823 } else
824 parser_params_overflow(parser, raw);
825
826 return VTE_SEQ_NONE;
827 }
828
829 static int
parser_param(vte_parser_t * parser,uint32_t raw)830 parser_param(vte_parser_t* parser,
831 uint32_t raw)
832 {
833 /* assert(raw >= '0' && raw <= '9'); */
834
835 if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX))
836 vte_seq_arg_push(&parser->seq.args[parser->seq.n_args], raw);
837 else
838 parser_params_overflow(parser, raw);
839
840 return VTE_SEQ_NONE;
841 }
842
843 static inline int
parser_osc_start(vte_parser_t * parser,uint32_t raw)844 parser_osc_start(vte_parser_t* parser,
845 uint32_t raw)
846 {
847 parser_clear(parser, raw);
848
849 vte_seq_string_reset(&parser->seq.arg_str);
850
851 parser->seq.introducer = raw;
852 return VTE_SEQ_NONE;
853 }
854
855 static int
parser_osc_collect(vte_parser_t * parser,uint32_t raw)856 parser_osc_collect(vte_parser_t* parser,
857 uint32_t raw)
858 {
859 /*
860 * Only characters from 0x20..0x7e and >= 0xa0 are allowed here.
861 * Our state-machine already verifies those restrictions.
862 */
863
864 if (G_UNLIKELY(!vte_seq_string_push(&parser->seq.arg_str, raw)))
865 parser->state = STATE_ST_IGNORE;
866
867 return VTE_SEQ_NONE;
868 }
869
870 static int
parser_dcs_start(vte_parser_t * parser,uint32_t raw)871 parser_dcs_start(vte_parser_t* parser,
872 uint32_t raw)
873 {
874 parser_clear_int_and_params(parser, raw);
875
876 vte_seq_string_reset(&parser->seq.arg_str);
877
878 parser->seq.introducer = raw;
879 return VTE_SEQ_NONE;
880 }
881
882 static int
parser_dcs_consume(vte_parser_t * parser,uint32_t raw)883 parser_dcs_consume(vte_parser_t* parser,
884 uint32_t raw)
885 {
886 /* parser->seq is cleared during DCS-START state, thus there's no need
887 * to clear invalid fields here. */
888
889 if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX)) {
890 if (parser->seq.n_args > 0 ||
891 vte_seq_arg_started(parser->seq.args[parser->seq.n_args])) {
892 vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], false);
893 ++parser->seq.n_args;
894 ++parser->seq.n_final_args;
895 }
896 }
897
898 parser->seq.type = VTE_SEQ_DCS;
899 parser->seq.terminator = raw;
900 parser->seq.st = 0;
901
902 auto flags = unsigned{};
903 parser->seq.command = vte_parse_host_dcs(&parser->seq, &flags);
904
905 return (flags & VTE_DISPATCH_UNRIPE) && parser->dispatch_unripe ? VTE_SEQ_DCS : VTE_SEQ_NONE;
906 }
907
908 static int
parser_dcs_collect(vte_parser_t * parser,uint32_t raw)909 parser_dcs_collect(vte_parser_t* parser,
910 uint32_t raw)
911 {
912 if (G_UNLIKELY(!vte_seq_string_push(&parser->seq.arg_str, raw)))
913 parser->state = STATE_DCS_IGNORE;
914
915 return VTE_SEQ_NONE;
916 }
917
918 static int
parser_esc(vte_parser_t * parser,uint32_t raw)919 parser_esc(vte_parser_t* parser,
920 uint32_t raw)
921 {
922 parser->seq.type = VTE_SEQ_ESCAPE;
923 parser->seq.terminator = raw;
924 parser->seq.charset = VTE_CHARSET_NONE;
925 parser->seq.command = vte_parse_host_escape(&parser->seq,
926 &parser->seq.charset);
927
928 return parser->seq.type;
929 }
930
931 static int
parser_csi(vte_parser_t * parser,uint32_t raw)932 parser_csi(vte_parser_t* parser,
933 uint32_t raw)
934 {
935 /* parser->seq is cleared during CSI-ENTER state, thus there's no need
936 * to clear invalid fields here. */
937
938 if (G_LIKELY(parser->seq.n_args < VTE_PARSER_ARG_MAX)) {
939 if (parser->seq.n_args > 0 ||
940 vte_seq_arg_started(parser->seq.args[parser->seq.n_args])) {
941 vte_seq_arg_finish(&parser->seq.args[parser->seq.n_args], false);
942 ++parser->seq.n_args;
943 ++parser->seq.n_final_args;
944 }
945 }
946
947 parser->seq.type = VTE_SEQ_CSI;
948 parser->seq.terminator = raw;
949 parser->seq.command = vte_parse_host_csi(&parser->seq);
950
951 return parser->seq.type;
952 }
953
954 static int
parser_osc(vte_parser_t * parser,uint32_t raw)955 parser_osc(vte_parser_t* parser,
956 uint32_t raw)
957 {
958 /* parser->seq is cleared during OSC_START state, thus there's no need
959 * to clear invalid fields here. */
960
961 vte_seq_string_finish(&parser->seq.arg_str);
962
963 /* We only dispatch a DCS if the introducer and string
964 * terminator are from the same control set, i.e. both
965 * C0 or both C1; we discard sequences with mixed controls.
966 */
967 if (!parser_check_matching_controls(parser->seq.introducer, raw))
968 return VTE_SEQ_IGNORE;
969
970 parser->seq.type = VTE_SEQ_OSC;
971 parser->seq.command = VTE_CMD_OSC;
972 parser->seq.st = raw;
973
974 return parser->seq.type;
975 }
976
977 static int
parser_dcs(vte_parser_t * parser,uint32_t raw)978 parser_dcs(vte_parser_t* parser,
979 uint32_t raw)
980 {
981 /* Most of parser->seq was already filled in parser_dcs_consume() */
982 parser->seq.st = raw;
983
984 vte_seq_string_finish(&parser->seq.arg_str);
985
986 /* We only dispatch a DCS if the introducer and string
987 * terminator are from the same control set, i.e. both
988 * C0 or both C1; we discard sequences with mixed controls.
989 */
990 if (!parser_check_matching_controls(parser->seq.introducer, raw))
991 return VTE_SEQ_IGNORE;
992
993 return parser->seq.type;
994 }
995
996 static int
parser_sci(vte_parser_t * parser,uint32_t raw)997 parser_sci(vte_parser_t* parser,
998 uint32_t raw)
999 {
1000 parser->seq.type = VTE_SEQ_SCI;
1001 parser->seq.terminator = raw;
1002 parser->seq.command = vte_parse_host_sci(&parser->seq);
1003
1004 return parser->seq.type;
1005 }
1006
1007 #define ACTION_CLEAR parser_clear
1008 #define ACTION_CLEAR_INT parser_clear_int
1009 #define ACTION_CLEAR_INT_AND_PARAMS parser_clear_int_and_params
1010 #define ACTION_CLEAR_PARAMS_ONLY parser_clear_params
1011 #define ACTION_IGNORE parser_ignore
1012 #define ACTION_PRINT parser_print
1013 #define ACTION_EXECUTE parser_execute
1014 #define ACTION_COLLECT_ESC parser_collect_esc
1015 #define ACTION_COLLECT_CSI parser_collect_csi
1016 #define ACTION_COLLECT_DCS ACTION_COLLECT_CSI
1017 #define ACTION_COLLECT_PARAMETER parser_collect_parameter
1018 #define ACTION_PARAM parser_param
1019 #define ACTION_FINISH_PARAM parser_finish_param
1020 #define ACTION_FINISH_SUBPARAM parser_finish_subparam
1021 #define ACTION_ESC_DISPATCH parser_esc
1022 #define ACTION_CSI_DISPATCH parser_csi
1023 #define ACTION_DCS_START parser_dcs_start
1024 #define ACTION_DCS_CONSUME parser_dcs_consume
1025 #define ACTION_DCS_COLLECT parser_dcs_collect
1026 #define ACTION_DCS_DISPATCH parser_dcs
1027 #define ACTION_OSC_START parser_osc_start
1028 #define ACTION_OSC_COLLECT parser_osc_collect
1029 #define ACTION_OSC_DISPATCH parser_osc
1030 #define ACTION_SCI_DISPATCH parser_sci
1031
1032 static int
parser_feed_to_state(vte_parser_t * parser,uint32_t raw)1033 parser_feed_to_state(vte_parser_t* parser,
1034 uint32_t raw)
1035 {
1036 switch (parser->state) {
1037 case STATE_GROUND:
1038 switch (raw) {
1039 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1040 case 0x1c ... 0x1f:
1041 case 0x80 ... 0x9f: /* C1 */
1042 return parser_action(parser, raw,
1043 ACTION_EXECUTE);
1044 case 0x1b: /* ESC */
1045 return parser_transition(parser, raw, STATE_ESC,
1046 ACTION_CLEAR_INT);
1047 }
1048
1049 return parser_action(parser, raw,
1050 ACTION_PRINT);
1051
1052 case STATE_DCS_PASS_ESC:
1053 case STATE_OSC_STRING_ESC:
1054 if (raw == 0x5c /* '\' */) {
1055 switch (parser->state) {
1056 case STATE_DCS_PASS_ESC:
1057 return parser_transition(parser, raw, STATE_GROUND,
1058 ACTION_DCS_DISPATCH);
1059 case STATE_OSC_STRING_ESC:
1060 return parser_transition(parser, raw, STATE_GROUND,
1061 ACTION_OSC_DISPATCH);
1062 }
1063 }
1064
1065 /* Do the deferred clear and fallthrough to STATE_ESC */
1066 parser_transition(parser, 0x1b /* ESC */, STATE_ESC,
1067 ACTION_CLEAR_INT);
1068
1069 [[fallthrough]];
1070 case STATE_ESC:
1071 switch (raw) {
1072 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1073 case 0x1c ... 0x1f:
1074 return parser_action(parser, raw,
1075 ACTION_EXECUTE);
1076 case 0x1b: /* ESC */
1077 return parser_transition(parser, raw, STATE_ESC,
1078 ACTION_CLEAR_INT);
1079 case 0x20 ... 0x2f: /* [' ' - '\'] */
1080 return parser_transition(parser, raw, STATE_ESC_INT,
1081 ACTION_COLLECT_ESC);
1082 case 0x30 ... 0x4f: /* ['0' - '~'] \ */
1083 case 0x51 ... 0x57: /* { 'P', 'X', 'Z' '[', ']', '^', '_' } */
1084 case 0x59:
1085 case 0x5c:
1086 case 0x60 ... 0x7e:
1087 return parser_transition(parser, raw, STATE_GROUND,
1088 ACTION_ESC_DISPATCH);
1089 case 0x50: /* 'P' */
1090 return parser_transition(parser, raw, STATE_DCS_ENTRY,
1091 ACTION_DCS_START);
1092 case 0x5a: /* 'Z' */
1093 return parser_transition(parser, raw, STATE_SCI,
1094 ACTION_CLEAR);
1095 case 0x5b: /* '[' */
1096 return parser_transition(parser, raw, STATE_CSI_ENTRY,
1097 ACTION_CLEAR_PARAMS_ONLY
1098 /* rest already cleaned on ESC state entry */);
1099 case 0x5d: /* ']' */
1100 return parser_transition(parser, raw, STATE_OSC_STRING,
1101 ACTION_OSC_START);
1102 case 0x58: /* 'X' */
1103 case 0x5e: /* '^' */
1104 case 0x5f: /* '_' */
1105 return parser_transition_no_action(parser, raw, STATE_ST_IGNORE);
1106 case 0x9c: /* ST */
1107 return parser_transition(parser, raw, STATE_GROUND,
1108 ACTION_IGNORE);
1109 }
1110
1111 return parser_transition(parser, raw, STATE_GROUND,
1112 ACTION_IGNORE);
1113 case STATE_ESC_INT:
1114 switch (raw) {
1115 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1116 case 0x1c ... 0x1f:
1117 return parser_action(parser, raw,
1118 ACTION_EXECUTE);
1119 case 0x1b: /* ESC */
1120 return parser_transition(parser, raw, STATE_ESC,
1121 ACTION_CLEAR_INT);
1122 case 0x20 ... 0x2f: /* [' ' - '\'] */
1123 return parser_action(parser, raw,
1124 ACTION_COLLECT_ESC);
1125 case 0x30 ... 0x7e: /* ['0' - '~'] */
1126 return parser_transition(parser, raw, STATE_GROUND,
1127 ACTION_ESC_DISPATCH);
1128 case 0x9c: /* ST */
1129 return parser_transition(parser, raw, STATE_GROUND,
1130 ACTION_IGNORE);
1131 }
1132
1133 return parser_transition(parser, raw, STATE_GROUND,
1134 ACTION_IGNORE);
1135 case STATE_CSI_ENTRY:
1136 switch (raw) {
1137 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1138 case 0x1c ... 0x1f:
1139 return parser_action(parser, raw,
1140 ACTION_EXECUTE);
1141 case 0x1b: /* ESC */
1142 return parser_transition(parser, raw, STATE_ESC,
1143 ACTION_CLEAR_INT);
1144 case 0x20 ... 0x2f: /* [' ' - '\'] */
1145 return parser_transition(parser, raw, STATE_CSI_INT,
1146 ACTION_COLLECT_CSI);
1147 case 0x30 ... 0x39: /* ['0' - '9'] */
1148 return parser_transition(parser, raw, STATE_CSI_PARAM,
1149 ACTION_PARAM);
1150 case 0x3a: /* ':' */
1151 return parser_transition(parser, raw, STATE_CSI_PARAM,
1152 ACTION_FINISH_SUBPARAM);
1153 case 0x3b: /* ';' */
1154 return parser_transition(parser, raw, STATE_CSI_PARAM,
1155 ACTION_FINISH_PARAM);
1156 case 0x3c ... 0x3f: /* ['<' - '?'] */
1157 return parser_transition(parser, raw, STATE_CSI_PARAM,
1158 ACTION_COLLECT_PARAMETER);
1159 case 0x40 ... 0x7e: /* ['@' - '~'] */
1160 return parser_transition(parser, raw, STATE_GROUND,
1161 ACTION_CSI_DISPATCH);
1162 case 0x9c: /* ST */
1163 return parser_transition(parser, raw, STATE_GROUND,
1164 ACTION_IGNORE);
1165 }
1166
1167 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1168 case STATE_CSI_PARAM:
1169 switch (raw) {
1170 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1171 case 0x1c ... 0x1f:
1172 return parser_action(parser, raw,
1173 ACTION_EXECUTE);
1174 case 0x1b: /* ESC */
1175 return parser_transition(parser, raw, STATE_ESC,
1176 ACTION_CLEAR_INT);
1177 case 0x20 ... 0x2f: /* [' ' - '\'] */
1178 return parser_transition(parser, raw, STATE_CSI_INT,
1179 ACTION_COLLECT_CSI);
1180 case 0x30 ... 0x39: /* ['0' - '9'] */
1181 return parser_action(parser, raw,
1182 ACTION_PARAM);
1183 case 0x3a: /* ':' */
1184 return parser_action(parser, raw,
1185 ACTION_FINISH_SUBPARAM);
1186 case 0x3b: /* ';' */
1187 return parser_action(parser, raw,
1188 ACTION_FINISH_PARAM);
1189 case 0x3c ... 0x3f: /* ['<' - '?'] */
1190 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1191 case 0x40 ... 0x7e: /* ['@' - '~'] */
1192 return parser_transition(parser, raw, STATE_GROUND,
1193 ACTION_CSI_DISPATCH);
1194 case 0x9c: /* ST */
1195 return parser_transition(parser, raw, STATE_GROUND,
1196 ACTION_IGNORE);
1197 }
1198
1199 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1200 case STATE_CSI_INT:
1201 switch (raw) {
1202 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1203 case 0x1c ... 0x1f:
1204 return parser_action(parser, raw,
1205 ACTION_EXECUTE);
1206 case 0x1b: /* ESC */
1207 return parser_transition(parser, raw, STATE_ESC,
1208 ACTION_CLEAR_INT);
1209 case 0x20 ... 0x2f: /* [' ' - '\'] */
1210 return parser_action(parser, raw,
1211 ACTION_COLLECT_CSI);
1212 case 0x30 ... 0x3f: /* ['0' - '?'] */
1213 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1214 case 0x40 ... 0x7e: /* ['@' - '~'] */
1215 return parser_transition(parser, raw, STATE_GROUND,
1216 ACTION_CSI_DISPATCH);
1217 case 0x9c: /* ST */
1218 return parser_transition(parser, raw, STATE_GROUND,
1219 ACTION_IGNORE);
1220 }
1221
1222 return parser_transition_no_action(parser, raw, STATE_CSI_IGNORE);
1223 case STATE_CSI_IGNORE:
1224 switch (raw) {
1225 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1226 case 0x1c ... 0x1f:
1227 return parser_action(parser, raw,
1228 ACTION_EXECUTE);
1229 case 0x1b: /* ESC */
1230 return parser_transition(parser, raw, STATE_ESC,
1231 ACTION_CLEAR_INT);
1232 case 0x20 ... 0x3f: /* [' ' - '?'] */
1233 return parser_nop(parser, raw);
1234 case 0x40 ... 0x7e: /* ['@' - '~'] */
1235 return parser_transition_no_action(parser, raw, STATE_GROUND);
1236 case 0x9c: /* ST */
1237 return parser_transition(parser, raw, STATE_GROUND,
1238 ACTION_IGNORE);
1239 }
1240
1241 return parser_nop(parser, raw);
1242 case STATE_DCS_ENTRY:
1243 switch (raw) {
1244 case 0x00 ... 0x1a: /* C0 \ ESC */
1245 case 0x1c ... 0x1f:
1246 return parser_action(parser, raw,
1247 ACTION_IGNORE);
1248 case 0x1b: /* ESC */
1249 return parser_transition(parser, raw, STATE_ESC,
1250 ACTION_CLEAR_INT);
1251 case 0x20 ... 0x2f: /* [' ' - '\'] */
1252 return parser_transition(parser, raw, STATE_DCS_INT,
1253 ACTION_COLLECT_DCS);
1254 case 0x30 ... 0x39: /* ['0' - '9'] */
1255 return parser_transition(parser, raw, STATE_DCS_PARAM,
1256 ACTION_PARAM);
1257 case 0x3a: /* ':' */
1258 return parser_transition(parser, raw, STATE_DCS_PARAM,
1259 ACTION_FINISH_SUBPARAM);
1260 case 0x3b: /* ';' */
1261 return parser_transition(parser, raw, STATE_DCS_PARAM,
1262 ACTION_FINISH_PARAM);
1263 case 0x3c ... 0x3f: /* ['<' - '?'] */
1264 return parser_transition(parser, raw, STATE_DCS_PARAM,
1265 ACTION_COLLECT_PARAMETER);
1266 case 0x40 ... 0x7e: /* ['@' - '~'] */
1267 return parser_transition(parser, raw, STATE_DCS_PASS,
1268 ACTION_DCS_CONSUME);
1269 case 0x9c: /* ST */
1270 return parser_transition(parser, raw, STATE_GROUND,
1271 ACTION_IGNORE);
1272 }
1273
1274 return parser_transition(parser, raw,
1275 STATE_DCS_PASS, ACTION_DCS_CONSUME);
1276 case STATE_DCS_PARAM:
1277 switch (raw) {
1278 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1279 case 0x1c ... 0x1f:
1280 return parser_action(parser, raw,
1281 ACTION_IGNORE);
1282 case 0x1b: /* ESC */
1283 return parser_transition(parser, raw, STATE_ESC,
1284 ACTION_CLEAR_INT);
1285 case 0x20 ... 0x2f: /* [' ' - '\'] */
1286 return parser_transition(parser, raw, STATE_DCS_INT,
1287 ACTION_COLLECT_DCS);
1288 case 0x30 ... 0x39: /* ['0' - '9'] */
1289 return parser_action(parser, raw,
1290 ACTION_PARAM);
1291 case 0x3a: /* ':' */
1292 return parser_action(parser, raw,
1293 ACTION_FINISH_SUBPARAM);
1294 case 0x3b: /* ';' */
1295 return parser_action(parser, raw,
1296 ACTION_FINISH_PARAM);
1297 case 0x3c ... 0x3f: /* ['<' - '?'] */
1298 return parser_transition_no_action(parser, raw, STATE_DCS_IGNORE);
1299 case 0x40 ... 0x7e: /* ['@' - '~'] */
1300 return parser_transition(parser, raw, STATE_DCS_PASS,
1301 ACTION_DCS_CONSUME);
1302 case 0x9c: /* ST */
1303 return parser_transition(parser, raw, STATE_GROUND,
1304 ACTION_IGNORE);
1305 }
1306
1307 return parser_transition(parser, raw,
1308 STATE_DCS_PASS, ACTION_DCS_CONSUME);
1309 case STATE_DCS_INT:
1310 switch (raw) {
1311 case 0x00 ... 0x1a: /* C0 \ { ESC } */
1312 case 0x1c ... 0x1f:
1313 return parser_action(parser, raw,
1314 ACTION_IGNORE);
1315 case 0x1b: /* ESC */
1316 return parser_transition(parser, raw, STATE_ESC,
1317 ACTION_CLEAR_INT);
1318 case 0x20 ... 0x2f: /* [' ' - '\'] */
1319 return parser_action(parser, raw,
1320 ACTION_COLLECT_DCS);
1321 case 0x30 ... 0x3f: /* ['0' - '?'] */
1322 return parser_transition_no_action(parser, raw, STATE_DCS_IGNORE);
1323 case 0x40 ... 0x7e: /* ['@' - '~'] */
1324 return parser_transition(parser, raw, STATE_DCS_PASS,
1325 ACTION_DCS_CONSUME);
1326 case 0x9c: /* ST */
1327 return parser_transition(parser, raw, STATE_GROUND,
1328 ACTION_IGNORE);
1329 }
1330
1331 return parser_transition(parser, raw,
1332 STATE_DCS_PASS, ACTION_DCS_CONSUME);
1333 case STATE_DCS_PASS:
1334 switch (raw) {
1335 case 0x00 ... 0x1a: /* ASCII \ { ESC } */
1336 case 0x1c ... 0x7f:
1337 return parser_action(parser, raw,
1338 ACTION_DCS_COLLECT);
1339 case 0x1b: /* ESC */
1340 return parser_transition_no_action(parser, raw, STATE_DCS_PASS_ESC);
1341 case 0x9c: /* ST */
1342 return parser_transition(parser, raw, STATE_GROUND,
1343 ACTION_DCS_DISPATCH);
1344 }
1345
1346 return parser_action(parser, raw,
1347 ACTION_DCS_COLLECT);
1348 case STATE_DCS_IGNORE:
1349 switch (raw) {
1350 case 0x00 ... 0x1a: /* ASCII \ { ESC } */
1351 case 0x1c ... 0x7f:
1352 return parser_nop(parser, raw);
1353 case 0x1b: /* ESC */
1354 return parser_transition(parser, raw, STATE_ESC,
1355 ACTION_CLEAR_INT);
1356 case 0x9c: /* ST */
1357 return parser_transition_no_action(parser, raw, STATE_GROUND);
1358 }
1359
1360 return parser_nop(parser, raw);
1361 case STATE_OSC_STRING:
1362 switch (raw) {
1363 case 0x00 ... 0x06: /* C0 \ { BEL, ESC } */
1364 case 0x08 ... 0x1a:
1365 case 0x1c ... 0x1f:
1366 return parser_nop(parser, raw);
1367 case 0x1b: /* ESC */
1368 return parser_transition_no_action(parser, raw, STATE_OSC_STRING_ESC);
1369 case 0x20 ... 0x7f: /* [' ' - DEL] */
1370 return parser_action(parser, raw,
1371 ACTION_OSC_COLLECT);
1372 case 0x07: /* BEL */
1373 case 0x9c: /* ST */
1374 return parser_transition(parser, raw, STATE_GROUND,
1375 ACTION_OSC_DISPATCH);
1376 }
1377
1378 return parser_action(parser, raw,
1379 ACTION_OSC_COLLECT);
1380 case STATE_ST_IGNORE:
1381 switch (raw) {
1382 case 0x00 ... 0x1a: /* ASCII \ { ESC } */
1383 case 0x1c ... 0x7f:
1384 return parser_nop(parser, raw);
1385 case 0x1b: /* ESC */
1386 return parser_transition(parser, raw, STATE_ESC,
1387 ACTION_CLEAR_INT);
1388 case 0x9c: /* ST */
1389 return parser_transition(parser, raw,
1390 STATE_GROUND, ACTION_IGNORE);
1391 }
1392
1393 return parser_nop(parser, raw);
1394 case STATE_SCI:
1395 switch (raw) {
1396 case 0x1b: /* ESC */
1397 return parser_transition(parser, raw,
1398 STATE_ESC, ACTION_CLEAR_INT);
1399 case 0x08 ... 0x0d: /* BS, HT, LF, VT, FF, CR */
1400 case 0x20 ... 0x7e: /* [' ' - '~'] */
1401 return parser_transition(parser, raw, STATE_GROUND,
1402 ACTION_SCI_DISPATCH);
1403 }
1404
1405 return parser_transition(parser, raw, STATE_GROUND,
1406 ACTION_IGNORE);
1407 }
1408
1409 g_assert_not_reached();
1410 return VTE_SEQ_NONE;
1411 }
1412
1413 int
vte_parser_feed(vte_parser_t * parser,uint32_t raw)1414 vte_parser_feed(vte_parser_t* parser,
1415 uint32_t raw)
1416 {
1417 /*
1418 * Notes:
1419 * * DEC treats GR codes as GL. We don't do that as we require UTF-8
1420 * as charset and, thus, it doesn't make sense to treat GR special.
1421 * * During control sequences, unexpected C1 codes cancel the sequence
1422 * and immediately start a new one. C0 codes, however, may or may not
1423 * be ignored/executed depending on the sequence.
1424 */
1425
1426 switch (raw) {
1427 case 0x18: /* CAN */
1428 return parser_transition(parser, raw,
1429 STATE_GROUND, ACTION_IGNORE);
1430 case 0x1a: /* SUB */
1431 return parser_transition(parser, raw,
1432 STATE_GROUND, ACTION_EXECUTE);
1433 case 0x7f: /* DEL */
1434 return parser_nop(parser, raw);
1435 case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, SCI, CSI, ST, OSC, PM, APC} */
1436 case 0x91 ... 0x97:
1437 case 0x99:
1438 return parser_transition(parser, raw,
1439 STATE_GROUND, ACTION_EXECUTE);
1440 case 0x98: /* SOS */
1441 case 0x9e: /* PM */
1442 case 0x9f: /* APC */
1443 return parser_transition_no_action(parser, raw, STATE_ST_IGNORE);
1444 // FIXMEchpe shouldn't this use ACTION_CLEAR?
1445 case 0x90: /* DCS */
1446 return parser_transition(parser, raw,
1447 STATE_DCS_ENTRY, ACTION_DCS_START);
1448 case 0x9a: /* SCI */
1449 return parser_transition(parser, raw,
1450 STATE_SCI, ACTION_CLEAR);
1451 case 0x9d: /* OSC */
1452 return parser_transition(parser, raw,
1453 STATE_OSC_STRING, ACTION_OSC_START);
1454 case 0x9b: /* CSI */
1455 return parser_transition(parser, raw,
1456 STATE_CSI_ENTRY, ACTION_CLEAR_INT_AND_PARAMS);
1457 default:
1458 return parser_feed_to_state(parser, raw);
1459 }
1460 }
1461
1462 void
vte_parser_reset(vte_parser_t * parser)1463 vte_parser_reset(vte_parser_t* parser)
1464 {
1465 parser_transition(parser, 0, STATE_GROUND, ACTION_IGNORE);
1466 }
1467
1468 /*
1469 * vte_parser_set_dispatch_unripe:
1470 * @parser: a #vte_parser_t
1471 * @enable:
1472 *
1473 * Enables or disables dispatch of unripe DCS sequences.
1474 * If enabled, known DCS sequences with the %VTE_DISPATCH_UNRIPE
1475 * flag will be dispatched when the Final character is received,
1476 * instead of when the control string terminator (ST) is received.
1477 * The application handling the unripe DCS sequence may then
1478 * either
1479 * * do nothing; in this case the DCS sequence will be dispatched
1480 * again when the control string was fully received. Ripe and
1481 * unripe sequences can be distinguished by the value of
1482 * parser.seq.st which will be 0 for an unripe sequence and
1483 * either 0x5c (C0 ST) or 0x9c (C1 ST) for a ripe sequence. Or
1484 * * call vte_parser_ignore_until_st(); in this case the DCS
1485 * sequence will be ignored until after the ST (or an other
1486 * character that aborts the control string) has been
1487 * received; or
1488 * * switch to a different parser (e.g. DECSIXEL) to parse the
1489 * control string directly on-the-fly. Note that in this case,
1490 * the subparser should take care to handle C0 and C1 controls
1491 * the same way as this parser would.
1492 */
1493 void
vte_parser_set_dispatch_unripe(vte_parser_t * parser,bool enable)1494 vte_parser_set_dispatch_unripe(vte_parser_t* parser,
1495 bool enable)
1496 {
1497 parser->dispatch_unripe = enable;
1498 }
1499
1500 /*
1501 * vte_parser_ignore_until_st:
1502 * @parser: a #vte_parser_t
1503 *
1504 * When used on an unrip %VTE_SEQ_DCS sequence, makes the
1505 * parser ignore everything until the ST is received (or
1506 * the DCS is aborted by the usual other means).
1507 *
1508 * Note that there is some inconsistencies here:
1509 *
1510 * * SUB aborts the DCS in our parser, but e.g. a DECSIXEL
1511 * parser will handle it as if 3/15 was received.
1512 *
1513 * * the ST terminating the DCS will be dispatched as an ST
1514 * sequence, instead of producing an IGNORE sequence
1515 * (this is easily fixable but would slightly complicate
1516 * the parser for no actual gain).
1517 */
1518 void
vte_parser_ignore_until_st(vte_parser_t * parser)1519 vte_parser_ignore_until_st(vte_parser_t* parser)
1520 {
1521 switch (parser->state) {
1522 case STATE_DCS_PASS:
1523 parser_transition_no_action(parser, 0, STATE_DCS_IGNORE);
1524 break;
1525 default:
1526 g_assert_not_reached();
1527 break;
1528 }
1529 }
1530