1
2 #include "cli.h"
3 #include "errors.h"
4 #include "cleancall.h"
5
6 #include <string.h>
7 #include <stdlib.h>
8
9 /* ---------------------------------------------------------------------- */
10
11 #define BUFFER_SIZE 4096
12
13 static char static_buffer[BUFFER_SIZE];
14
15 struct cli_buffer {
16 char *buf;
17 char *ptr;
18 size_t size;
19 };
20
21 static void clic__buffer_init(struct cli_buffer *buf);
22 static void clic__buffer_free(struct cli_buffer *buf);
23 static void clic__buffer_reset(struct cli_buffer *buf);
24 static inline char *clic__buffer_get(struct cli_buffer *buf);
25 static inline size_t clic__buffer_size(struct cli_buffer *buf);
26 static inline void clic__buffer_push_str(struct cli_buffer *buf,
27 const char *str);
28 /* static inline void clic__buffer_push_str_len(struct cli_buffer *buf, */
29 /* const char *str, */
30 /* size_t len); */
31 static inline void clic__buffer_push_piece(struct cli_buffer *buf,
32 const char *from,
33 const char *to);
34 static void clic__buffer_realloc(struct cli_buffer *buf, size_t size);
35 static void clic__buffer_checklen(struct cli_buffer *buf, size_t len);
36
clic__buffer_init(struct cli_buffer * buf)37 static void clic__buffer_init(struct cli_buffer *buf) {
38 buf->buf = static_buffer;
39 buf->ptr = static_buffer;
40 buf->size = sizeof(static_buffer);
41 }
42
clic__buffer_reset(struct cli_buffer * buf)43 static void clic__buffer_reset(struct cli_buffer *buf) {
44 buf->ptr = buf->buf;
45 }
46
clic__buffer_get(struct cli_buffer * buf)47 static inline char *clic__buffer_get(struct cli_buffer *buf) {
48 return buf->buf;
49 }
50
clic__buffer_size(struct cli_buffer * buf)51 static inline size_t clic__buffer_size(struct cli_buffer *buf) {
52 return buf->ptr - buf->buf;
53 }
54
clic__buffer_free(struct cli_buffer * buf)55 static void clic__buffer_free(struct cli_buffer *buf) {
56 if (buf->buf != static_buffer) free(buf->buf);
57 }
58
clic__buffer_push_str(struct cli_buffer * buf,const char * str)59 static inline void clic__buffer_push_str(struct cli_buffer *buf,
60 const char *str) {
61 size_t len = strlen(str);
62 clic__buffer_checklen(buf, len);
63 strcpy(buf->ptr, str);
64 buf->ptr += len;
65 }
66
67 /* static inline void clic__buffer_push_str_len(struct cli_buffer *buf, */
68 /* const char *str, */
69 /* size_t len) { */
70 /* clic__buffer_checklen(buf, len); */
71 /* memcpy(buf->ptr, str, len); */
72 /* buf->ptr += len; */
73 /* } */
74
clic__buffer_push_piece(struct cli_buffer * buf,const char * from,const char * to)75 static inline void clic__buffer_push_piece(struct cli_buffer *buf,
76 const char *from,
77 const char *to) {
78 size_t len = to - from;
79 clic__buffer_checklen(buf, len);
80 memcpy(buf->ptr, from, len);
81 buf->ptr += len;
82 }
83
clic__buffer_realloc(struct cli_buffer * buf,size_t size)84 static void clic__buffer_realloc(struct cli_buffer *buf, size_t size) {
85 size_t current = buf->ptr - buf->buf;
86 char *old = buf->buf;
87 buf->size = size;
88 if (buf->buf == static_buffer) {
89 buf->buf = malloc(size);
90 if (!buf->buf) R_THROW_SYSTEM_ERROR("ANSI string error");
91 memcpy(buf->buf, old, current);
92 } else {
93 buf->buf = realloc(buf->buf, size);
94 if (!buf->buf) R_THROW_SYSTEM_ERROR("ANSI string error");
95 }
96 buf->ptr = buf->buf + current;
97 }
98
clic__buffer_checklen(struct cli_buffer * buf,size_t len)99 static void clic__buffer_checklen(struct cli_buffer *buf, size_t len) {
100
101 if (buf->ptr + len >= buf->buf + buf->size) {
102 size_t current = buf->ptr - buf->buf;
103 size_t prop = buf->size * 2;
104 if (prop < current + len) prop = current + len;
105 clic__buffer_realloc(buf, prop);
106 }
107 }
108
109 /* ---------------------------------------------------------------------- */
110
111 #define CLI_COL_256 254
112 #define CLI_COL_RGB 255
113
114 struct cli_color {
115 /* 0 is off
116 * 30-37, 40-47, 90-97, 100-107
117 * CLI_COL_256 (254) is 8 bit
118 * CLI_COL_RGB (255) is 24 bit */
119 unsigned char col;
120 unsigned char r, g, b;
121 };
122
123 #define DIFFERENT_COLOR(c1,c2) memcmp(&(c1), &(c2), sizeof(struct cli_color))
124
125 struct cli_sgr_state {
126 struct cli_color fg;
127 struct cli_color bg;
128 char bold;
129 char faint;
130 char italic;
131 char underline;
132 char blink;
133 char inverse;
134 char hide;
135 char crossedout;
136 };
137
138 struct cli_ansi_state {
139 struct cli_sgr_state old;
140 struct cli_sgr_state new;
141 char unknown;
142 char off;
143 };
144
clic__readnum(char ** ptr,unsigned int * num)145 static void clic__readnum(char **ptr, unsigned int *num) {
146 int len = 0;
147 if ((*ptr)[0] != ';') return;
148 (*ptr) ++;
149 sscanf(*ptr, "%u%n", num, &len);
150 *ptr += len;
151 while (**ptr != ';' && **ptr != '\0') (*ptr) ++;
152 }
153
clic__parse_color(char ** ptr,const char * end,struct cli_color * col)154 static void clic__parse_color(char **ptr, const char *end, struct cli_color *col) {
155 /* This can be:
156 * - 5;<n>
157 * - 2;<r>;<g>;<b>
158 */
159
160 /* Has to start with ;5; or ;2;, otherwise we skip the whole tag */
161 if ((*ptr)[0] != ';' ||
162 ((*ptr)[1] != '5' && (*ptr)[1] != '2') ||
163 (*ptr)[2] != ';') {
164 *ptr = (char*) end;
165 col->r = col->g = col->b = 0;
166 return;
167 }
168
169 col->col = (*ptr)[1] == '5' ? CLI_COL_256 : CLI_COL_RGB;
170 (*ptr) += 2;
171
172 /* Temporarily create a zero terminated string for sscanf */
173 char backup = *end;
174 char *end2 = (char*) end;
175 *end2 = '\0';
176
177 unsigned int r = 0, g = 0, b = 0;
178
179 clic__readnum(ptr, &r);
180 if (col->col == CLI_COL_RGB) {
181 clic__readnum(ptr, &g);
182 clic__readnum(ptr, &b);
183 }
184
185 col->r = (unsigned char) r;
186 col->g = (unsigned char) g;
187 col->b = (unsigned char) b;
188
189 *end2 = backup;
190 }
191
clic__ansi_update_state(const char * param,const char * intermed,const char * end,struct cli_buffer * buffer,struct cli_ansi_state * state)192 static void clic__ansi_update_state(const char *param,
193 const char *intermed,
194 const char *end,
195 struct cli_buffer *buffer,
196 struct cli_ansi_state *state) {
197
198 char *startptr = (char*) param, *endptr;
199 do {
200 long num = strtol(startptr, &endptr, 10);
201 if (endptr == startptr || num == 0) {
202 memset(&state->new, 0, sizeof(state->new));
203 state->off = 1;
204
205 } else if (num == 1) {
206 state->new.bold = 1;
207
208 } else if (num == 2) {
209 state->new.faint = 1;
210
211 } else if (num == 3) {
212 state->new.italic = 1;
213
214 } else if (num == 4) {
215 state->new.underline = 1;
216
217 } else if (num == 5) {
218 state->new.blink = 1;
219
220 } else if (num == 7) {
221 state->new.inverse = 1;
222
223 } else if (num == 8) {
224 state->new.hide = 1;
225
226 } else if (num == 9) {
227 state->new.crossedout = 1;
228
229 } else if (num == 22) {
230 state->new.bold = state->new.faint = 0;
231
232 } else if (num == 23) {
233 state->new.italic = 0;
234
235 } else if (num == 24) {
236 state->new.underline = 0;
237
238 } else if (num == 25) {
239 state->new.blink = 0;
240
241 } else if (num == 27) {
242 state->new.inverse = 0;
243
244 } else if (num == 28) {
245 state->new.hide = 0;
246
247 } else if (num == 29) {
248 state->new.crossedout = 0;
249
250 } else if ((num >= 30 && num <= 37) || (num >= 90 && num <= 97)) {
251 state->new.fg.col = num;
252
253 } else if (num == 38) {
254 clic__parse_color(&endptr, intermed, &state->new.fg);
255
256 } else if (num == 39) {
257 state->new.fg.col = 0;
258
259 } else if ((num >= 40 && num <= 47) || (num >= 100 && num <= 107)) {
260 state->new.bg.col = num;
261
262 } else if (num == 48) {
263 clic__parse_color(&endptr, intermed, &state->new.bg);
264
265 } else if (num == 49) {
266 state->new.bg.col = 0;
267
268 } else {
269 /* Keep tag as is, and emit it right away */
270 state->unknown = 1;
271 clic__buffer_push_piece(buffer, param - 2, end + 1);
272 }
273
274 /* The next attribute, if any */
275 startptr = endptr + 1;
276 } while (endptr < intermed && *endptr == ';');
277 }
278
279 #define EMIT(s) clic__buffer_push_str(buffer, "\033[" s "m")
280 #define EMITS(s) clic__buffer_push_str(buffer, (s))
281
clic__state_update_buffer(struct cli_buffer * buffer,struct cli_ansi_state * state)282 static void clic__state_update_buffer(struct cli_buffer *buffer,
283 struct cli_ansi_state *state) {
284
285 char col[20];
286
287 if (state->unknown && state->off) {
288 state->unknown = state->off = 0;
289 EMIT("0");
290 }
291
292 /* Closing tags ------------------------------------------------------ */
293
294 if (state->old.bg.col != 0 && state->new.bg.col != state->old.bg.col) {
295 EMIT("49");
296 }
297
298 if (state->old.fg.col != 0 && state->new.fg.col != state->old.fg.col) {
299 EMIT("39");
300 }
301
302 if (state->new.crossedout < state->old.crossedout) {
303 EMIT("29");
304 }
305
306 if (state->new.hide < state->old.hide) {
307 EMIT("28");
308 }
309
310 if (state->new.inverse < state->old.inverse) {
311 EMIT("27");
312 }
313
314 if (state->new.blink < state->old.blink) {
315 EMIT("25");
316 }
317
318 if (state->new.underline < state->old.underline) {
319 EMIT("24");
320 }
321
322 if (state->new.italic < state->old.italic) {
323 EMIT("23");
324 }
325
326 if (state->new.faint < state->old.faint) {
327 EMIT("22");
328 }
329
330 if (state->new.bold < state->old.bold) {
331 /* TODO: handle bold + faint interaction */
332 EMIT("22");
333 }
334
335 /* Opening tags in reverse order ------------------------------------- */
336
337 if (state->new.bold > state->old.bold) {
338 EMIT("1");
339 }
340
341 if (state->new.faint > state->old.faint) {
342 EMIT("2");
343 }
344
345 if (state->new.italic > state->old.italic) {
346 EMIT("3");
347 }
348
349 if (state->new.underline > state->old.underline) {
350 EMIT("4");
351 }
352
353 if (state->new.blink > state->old.blink) {
354 EMIT("5");
355 }
356
357 if (state->new.inverse > state->old.inverse) {
358 EMIT("7");
359 }
360
361 if (state->new.hide > state->old.hide) {
362 EMIT("8");
363 }
364
365 if (state->new.crossedout > state->old.crossedout) {
366 EMIT("9");
367 }
368
369 if (state->new.fg.col != 0 &&
370 DIFFERENT_COLOR(state->new.fg, state->old.fg)) {
371 if (state->new.fg.col == CLI_COL_256) {
372 snprintf(col, sizeof(col), "\033[38;5;%um", state->new.fg.r);
373 } else if (state->new.fg.col == CLI_COL_RGB) {
374 snprintf(col, sizeof(col), "\033[38;2;%u;%u;%um",
375 state->new.fg.r, state->new.fg.g, state->new.fg.b);
376 } else {
377 snprintf(col, sizeof(col), "\033[%um", state->new.fg.col);
378 }
379 EMITS(col);
380 }
381
382 if (state->new.bg.col != 0 &&
383 DIFFERENT_COLOR(state->new.bg, state->old.bg)) {
384 if (state->new.bg.col == CLI_COL_256) {
385 snprintf(col, sizeof(col), "\033[48;5;%um", state->new.bg.r);
386 } else if (state->new.bg.col == CLI_COL_RGB) {
387 snprintf(col, sizeof(col), "\033[48;2;%u;%u;%um",
388 state->new.bg.r, state->new.bg.g, state->new.bg.b);
389 } else {
390 snprintf(col, sizeof(col), "\033[%um", state->new.bg.col);
391 }
392 EMITS(col);
393 }
394
395 state->old = state->new;
396 }
397
398 typedef int (*clic__start_callback_t)(SEXP rstr,
399 const char *str,
400 void *data);
401 typedef int (*clic__tag_callback_t)(const char *param,
402 const char *intermed,
403 const char *end,
404 void *data);
405 typedef int (*clic__text_callback_t)(const char *str,
406 const char *end,
407 void *data);
408 typedef int (*clic__end_callback_t)(SEXP rstr,
409 const char *str,
410 void *data);
411
clic__ansi_iterator(SEXP sx,clic__start_callback_t start_cb,clic__tag_callback_t sgr_cb,clic__tag_callback_t csi_cb,clic__text_callback_t text_cb,clic__end_callback_t end_cb,void * data)412 void clic__ansi_iterator(SEXP sx,
413 clic__start_callback_t start_cb,
414 clic__tag_callback_t sgr_cb,
415 clic__tag_callback_t csi_cb,
416 clic__text_callback_t text_cb,
417 clic__end_callback_t end_cb,
418 void *data) {
419
420 R_xlen_t i, len = XLENGTH(sx);
421 for (i = 0; i < len; i++) {
422 SEXP str = STRING_ELT(sx, i);
423 const char *ox = CHAR(str);
424 const char *x = ox;
425 const char *shaft = ox;
426 const char *s_start;
427 const char *s_param;
428 const char *s_intermed;
429 const char *s_end;
430
431 if (start_cb) if (start_cb(str, ox, data)) goto end;
432 if (str == NA_STRING) goto end;
433
434 while (*x != 0) {
435 if (*x == '\033' && *(x + 1) == '[') {
436 s_start = x;
437 s_param = s_intermed = x + 2;
438 while (*s_intermed >= 0x30 && *s_intermed <= 0x3f) s_intermed++;
439 s_end = s_intermed;
440 while (*s_end >= 0x20 && *s_end <= 0x2f) s_end++;
441 if (s_start > shaft && text_cb) {
442 if (text_cb(shaft, s_start, data)) goto end;
443 }
444 if (*s_end == 'm') {
445 if (sgr_cb) {
446 if (sgr_cb(s_param, s_intermed, s_end, data)) goto end;
447 }
448 } else {
449 if (csi_cb) {
450 if (csi_cb(s_param, s_intermed, s_end, data)) goto end;
451 }
452 }
453 shaft = s_end + 1;
454 x = *s_end ? s_end + 1 : s_end;
455 } else {
456 x++;
457 }
458 /* This is not needed, but slightly faster this way */
459 while (*x != '\033' && *x != '\0') x++;
460 }
461
462 if (x > shaft && text_cb) text_cb(shaft, x, data);
463
464 end:
465 if (end_cb) end_cb(str, ox, data);
466 }
467 }
468
469 /* ---------------------------------------------------------------------- */
470
471 struct simplify_data {
472 struct cli_ansi_state state;
473 struct cli_buffer buffer;
474 R_xlen_t done;
475 size_t num_tags;
476 SEXP result;
477 char keep_csi;
478 };
479
simplify_cb_start(SEXP rstr,const char * str,void * vdata)480 static int simplify_cb_start(SEXP rstr, const char *str, void *vdata) {
481 struct simplify_data *data = vdata;
482 data->num_tags = 0;
483 clic__buffer_reset(&data->buffer);
484 return 0;
485 }
486
simplify_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)487 static int simplify_cb_sgr(const char *param,
488 const char *intermed,
489 const char *end,
490 void *vdata) {
491 struct simplify_data *data = vdata;
492 data->num_tags ++;
493 clic__ansi_update_state(param, intermed, end, &data->buffer, &data->state);
494 return 0;
495 }
496
simplify_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)497 static int simplify_cb_csi(const char *param,
498 const char *intermed,
499 const char *end,
500 void *vdata) {
501 struct simplify_data *data = vdata;
502 if (data->keep_csi) {
503 clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
504 } else {
505 /* Need to count, to avoid a verbatim STRSXP copy */
506 data->num_tags ++;
507 }
508 return 0;
509 }
510
simplify_cb_text(const char * str,const char * end,void * vdata)511 static int simplify_cb_text(const char *str,
512 const char *end,
513 void *vdata) {
514 struct simplify_data *data = vdata;
515 clic__state_update_buffer(&data->buffer, &data->state);
516 clic__buffer_push_piece(&data->buffer, str, end);
517 return 0;
518 }
519
simplify_cb_end(SEXP rstr,const char * str,void * vdata)520 static int simplify_cb_end(SEXP rstr,
521 const char *str,
522 void *vdata) {
523 struct simplify_data *data = vdata;
524 memset(&data->state.new, 0, sizeof(struct cli_sgr_state));
525 clic__state_update_buffer(&data->buffer, &data->state);
526 if (data->num_tags == 0) {
527 SET_STRING_ELT(data->result, data->done, rstr);
528
529 } else {
530 SET_STRING_ELT(
531 data->result,
532 data->done,
533 Rf_mkCharLenCE(
534 clic__buffer_get(&data->buffer),
535 clic__buffer_size(&data->buffer),
536 CE_UTF8
537 )
538 );
539 }
540
541 data->done ++;
542 return 0;
543 }
544
clic_ansi_simplify(SEXP sx,SEXP keep_csi)545 SEXP clic_ansi_simplify(SEXP sx, SEXP keep_csi) {
546 struct simplify_data data;
547 memset(&data.state, 0, sizeof(data.state));
548 clic__buffer_init(&data.buffer);
549 data.done = 0;
550 data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
551 data.keep_csi = LOGICAL(keep_csi)[0];
552
553 clic__ansi_iterator(
554 sx,
555 simplify_cb_start,
556 simplify_cb_sgr,
557 simplify_cb_csi,
558 simplify_cb_text,
559 simplify_cb_end,
560 &data
561 );
562
563 clic__buffer_free(&data.buffer);
564
565 SEXP ocls = PROTECT(getAttrib(sx, R_ClassSymbol));
566 int oclslen = isNull(ocls) ? 0 : LENGTH(ocls);
567 int has_as = oclslen == 0 ? 0 : Rf_inherits(sx, "ansi_string");
568 int has_ch = oclslen == 0 ? 0 : Rf_inherits(sx, "character");
569 int i, j = 0, clslen = oclslen + !has_as + !has_ch;
570 SEXP cls = PROTECT(allocVector(STRSXP, clslen));
571 if (!has_as) SET_STRING_ELT(cls, j++, mkChar("ansi_string"));
572 for (i = 0; i < oclslen; i++) {
573 SET_STRING_ELT(cls, j++, STRING_ELT(ocls, i));
574 }
575 if (!has_ch) SET_STRING_ELT(cls, j++, mkChar("character"));
576 setAttrib(data.result, R_ClassSymbol, cls);
577 UNPROTECT(3);
578 return data.result;
579 }
580
581 /* ---------------------------------------------------------------------- */
582
583 struct substr_data {
584 struct cli_ansi_state state;
585 struct cli_buffer buffer;
586 R_xlen_t done;
587 SEXP result;
588 int *start;
589 int *stop;
590 int pos;
591 };
592
substr_cb_start(SEXP rstr,const char * str,void * vdata)593 static int substr_cb_start(SEXP rstr, const char *str, void *vdata) {
594 struct substr_data *data = vdata;
595 data->pos = 1;
596 clic__buffer_reset(&data->buffer);
597 return rstr == NA_STRING;
598 }
599
substr_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)600 static int substr_cb_sgr(const char *param,
601 const char *intermed,
602 const char *end,
603 void *vdata) {
604 struct substr_data *data = vdata;
605 clic__ansi_update_state(param, intermed, end, &data->buffer, &data->state);
606 return 0;
607 }
608
substr_cb_text(const char * str,const char * end,void * vdata)609 static int substr_cb_text(const char *str,
610 const char *end,
611 void *vdata) {
612 struct substr_data *data = vdata;
613 int start = data->start[data->done];
614 int stop = data->stop[data->done];
615
616 char *end2 = (char*) end;
617 char oldend = *end2;
618 *end2 = '\0';
619
620 /* Skip before start */
621 struct grapheme_iterator iter;
622 if (data->pos < start) {
623 clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 0);
624 while (data->pos < start && iter.nxt_prop != -1) {
625 clic_utf8_graphscan_next(&iter, NULL, NULL);
626 data->pos++;
627 }
628 str = (const char*) iter.cnd;
629 }
630
631 /* Add before stop */
632 if (data->pos <= stop) {
633 const char *from = str;
634 clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 0);
635 while (data->pos <= stop && iter.nxt_prop != -1) {
636 clic_utf8_graphscan_next(&iter, NULL, NULL);
637 data->pos++;
638 }
639 str = (const char*) iter.cnd;
640 if (from < str) {
641 clic__state_update_buffer(&data->buffer, &data->state);
642 clic__buffer_push_piece(&data->buffer, from, str);
643 }
644 }
645
646 *end2 = oldend;
647
648 /* If we are done, then just close all open tags */
649 if (data->pos > stop) {
650 memset(&data->state.new, 0, sizeof(struct cli_sgr_state));
651 clic__state_update_buffer(&data->buffer, &data->state);
652 return 1;
653 } else {
654 return 0;
655 }
656 }
657
substr_cb_end(SEXP rstr,const char * str,void * vdata)658 static int substr_cb_end(SEXP rstr,
659 const char *str,
660 void *vdata) {
661 struct substr_data *data = vdata;
662 memset(&data->state.new, 0, sizeof(struct cli_sgr_state));
663 clic__state_update_buffer(&data->buffer, &data->state);
664 if (rstr == NA_STRING) {
665 SET_STRING_ELT(data->result, data->done, rstr);
666 } else {
667 SET_STRING_ELT(
668 data->result,
669 data->done,
670 Rf_mkCharLenCE(
671 clic__buffer_get(&data->buffer),
672 clic__buffer_size(&data->buffer),
673 CE_UTF8
674 )
675 );
676 }
677
678 data->done++;
679 return 0;
680 }
681
clic_ansi_substr(SEXP sx,SEXP start,SEXP stop)682 SEXP clic_ansi_substr(SEXP sx, SEXP start, SEXP stop) {
683 struct substr_data data;
684 memset(&data.state, 0, sizeof(data.state));
685 clic__buffer_init(&data.buffer);
686 data.done = 0;
687 data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
688 data.start = INTEGER(start);
689 data.stop = INTEGER(stop);
690
691 clic__ansi_iterator(
692 sx,
693 substr_cb_start,
694 substr_cb_sgr,
695 NULL,
696 substr_cb_text,
697 substr_cb_end,
698 &data
699 );
700
701 clic__buffer_free(&data.buffer);
702
703 SEXP ocls = PROTECT(getAttrib(sx, R_ClassSymbol));
704 int oclslen = isNull(ocls) ? 0 : LENGTH(ocls);
705 int has_as = oclslen == 0 ? 0 : Rf_inherits(sx, "ansi_string");
706 int has_ch = oclslen == 0 ? 0 : Rf_inherits(sx, "character");
707 int i, j = 0, clslen = oclslen + !has_as + !has_ch;
708 SEXP cls = PROTECT(allocVector(STRSXP, clslen));
709 if (!has_as) SET_STRING_ELT(cls, j++, mkChar("ansi_string"));
710 for (i = 0; i < oclslen; i++) {
711 SET_STRING_ELT(cls, j++, STRING_ELT(ocls, i));
712 }
713 if (!has_ch) SET_STRING_ELT(cls, j++, mkChar("character"));
714 setAttrib(data.result, R_ClassSymbol, cls);
715 UNPROTECT(3);
716 return data.result;
717 }
718
719 /* ---------------------------------------------------------------------- */
720
721 struct html_data {
722 struct cli_ansi_state state;
723 struct cli_buffer buffer;
724 const char *str;
725 R_xlen_t done;
726 SEXP result;
727 char had_tags;
728 char keep_csi;
729 };
730
731 #define EMITS1(s) do { \
732 if (first) { \
733 EMITS("<span class=\"ansi"); \
734 first = 0; \
735 } \
736 EMITS(s); } while (0)
737
738
clic__html_start(struct html_data * data)739 static void clic__html_start(struct html_data *data) {
740
741 struct cli_buffer *buffer = &data->buffer;
742 struct cli_ansi_state *state = &data->state;
743
744 char col[64];
745
746 int first = 1;
747 /* Opening tags ------------------------------------------------------ */
748
749 if (state->new.bold > state->old.bold) {
750 EMITS1(" ansi-bold");
751 }
752
753 if (state->new.faint > state->old.faint) {
754 EMITS1(" ansi-faint");
755 }
756
757 if (state->new.italic > state->old.italic) {
758 EMITS1(" ansi-italic");
759 }
760
761 if (state->new.underline > state->old.underline) {
762 EMITS1(" ansi-underline");
763 }
764
765 if (state->new.blink > state->old.blink) {
766 EMITS1(" ansi-blink");
767 }
768
769 if (state->new.inverse > state->old.inverse) {
770 EMITS1(" ansi-inverse");
771 }
772
773 if (state->new.hide > state->old.hide) {
774 EMITS1(" ansi-hide");
775 }
776
777 if (state->new.crossedout > state->old.crossedout) {
778 EMITS1(" ansi-crossedout");
779 }
780
781 if (state->new.fg.col != 0 &&
782 DIFFERENT_COLOR(state->new.fg, state->old.fg)) {
783 if (state->new.fg.col == CLI_COL_256) {
784 snprintf(col, sizeof(col), " ansi-color-%u", state->new.fg.r);
785 } else if (state->new.fg.col == CLI_COL_RGB) {
786 snprintf(col, sizeof(col), " ansi-color-%u-%u-%u",
787 state->new.fg.r, state->new.fg.g, state->new.fg.b);
788 } else {
789 unsigned char ncol = state->new.fg.col - 30;
790 if (ncol > 7) ncol -= 60;
791 snprintf(col, sizeof(col), " ansi-color-%u", ncol);
792 }
793 EMITS1(col);
794 }
795
796 if (state->new.bg.col != 0 &&
797 DIFFERENT_COLOR(state->new.bg, state->old.bg)) {
798 if (state->new.bg.col == CLI_COL_256) {
799 snprintf(col, sizeof(col), " ansi-bg-color-%u",
800 state->new.bg.r);
801 } else if (state->new.bg.col == CLI_COL_RGB) {
802 snprintf(col, sizeof(col), " ansi-bg-color-%u-%u-%u",
803 state->new.bg.r, state->new.bg.g, state->new.bg.b);
804 } else {
805 unsigned char ncol = state->new.bg.col - 40;
806 if (ncol > 7) ncol -= 60;
807 snprintf(col, sizeof(col), " ansi-bg-color-%u", ncol);
808 }
809 EMITS1(col);
810 }
811
812 state->old = state->new;
813
814 if (!first) EMITS("\">");
815 data->had_tags = !first;
816 }
817
clic__html_end(struct html_data * data)818 static void clic__html_end(struct html_data *data) {
819
820 struct cli_buffer *buffer = &data->buffer;
821 if (data->had_tags) EMITS("</span>");
822 }
823
html_cb_start(SEXP rstr,const char * str,void * vdata)824 static int html_cb_start(SEXP rstr, const char *str, void *vdata) {
825 struct html_data *data = vdata;
826 clic__buffer_reset(&data->buffer);
827 return rstr == NA_STRING;
828 }
829
html_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)830 static int html_cb_sgr(const char *param,
831 const char *intermed,
832 const char *end,
833 void *vdata) {
834 struct html_data *data = vdata;
835 clic__ansi_update_state(param, intermed, end, &data->buffer, &data->state);
836 return 0;
837 }
838
html_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)839 static int html_cb_csi(const char *param,
840 const char *intermed,
841 const char *end,
842 void *vdata) {
843 struct html_data *data = vdata;
844 if (data->keep_csi) {
845 clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
846 }
847 return 0;
848 }
849
html_cb_text(const char * str,const char * end,void * vdata)850 static int html_cb_text(const char *str,
851 const char *end,
852 void *vdata) {
853 struct html_data *data = vdata;
854 clic__html_start(data);
855 clic__buffer_push_piece(&data->buffer, str, end);
856 clic__html_end(data);
857 return 0;
858 }
859
html_cb_end(SEXP rstr,const char * str,void * vdata)860 static int html_cb_end(SEXP rstr,
861 const char *str,
862 void *vdata) {
863 struct html_data *data = vdata;
864 memset(&data->state.new, 0, sizeof(data->state.new));
865 if (rstr == NA_STRING) {
866 SET_STRING_ELT(data->result, data->done, rstr);
867 } else {
868 SET_STRING_ELT(
869 data->result,
870 data->done,
871 Rf_mkCharLenCE(
872 clic__buffer_get(&data->buffer),
873 clic__buffer_size(&data->buffer),
874 CE_UTF8
875 )
876 );
877 }
878
879 data->done++;
880 return 0;
881 }
882
clic_ansi_html(SEXP sx,SEXP keep_csi)883 SEXP clic_ansi_html(SEXP sx, SEXP keep_csi) {
884 struct html_data data;
885 memset(&data.state, 0, sizeof(data.state));
886 clic__buffer_init(&data.buffer);
887 data.done = 0;
888 data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
889 data.keep_csi = LOGICAL(keep_csi)[0];
890
891 clic__ansi_iterator(
892 sx,
893 html_cb_start,
894 html_cb_sgr,
895 html_cb_csi,
896 html_cb_text,
897 html_cb_end,
898 &data
899 );
900
901 clic__buffer_free(&data.buffer);
902
903 UNPROTECT(1);
904 return data.result;
905 }
906
907 /* ---------------------------------------------------------------------- */
908
909 struct has_any_data {
910 R_xlen_t done;
911 SEXP result;
912 char sgr;
913 char csi;
914 char has;
915 };
916
has_any_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)917 static int has_any_cb_sgr(const char *param,
918 const char *intermed,
919 const char *end,
920 void *vdata) {
921 struct has_any_data *data = vdata;
922 if (data->sgr) {
923 data->has = 1;
924 return 1;
925 } else {
926 return 0;
927 }
928 }
929
has_any_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)930 static int has_any_cb_csi(const char *param,
931 const char *intermed,
932 const char *end,
933 void *vdata) {
934 struct has_any_data *data = vdata;
935 if (data->csi) {
936 data->has = 1;
937 return 1;
938 } else {
939 return 0;
940 }
941 }
942
943
has_any_cb_end(SEXP rstr,const char * str,void * vdata)944 static int has_any_cb_end(SEXP rstr,
945 const char *str,
946 void *vdata) {
947 struct has_any_data *data = vdata;
948 if (rstr == NA_STRING) {
949 LOGICAL(data->result)[data->done] = NA_LOGICAL;
950 } else {
951 LOGICAL(data->result)[data->done] = data->has;
952 }
953 data->has = 0;
954 data->done ++;
955 return 0;
956 }
957
clic_ansi_has_any(SEXP sx,SEXP sgr,SEXP csi)958 SEXP clic_ansi_has_any(SEXP sx, SEXP sgr, SEXP csi) {
959 struct has_any_data data;
960 data.done = 0;
961 data.has = 0;
962 data.result = PROTECT(allocVector(LGLSXP, XLENGTH(sx)));
963 data.sgr = LOGICAL(sgr)[0];
964 data.csi = LOGICAL(csi)[0];
965
966 clic__ansi_iterator(
967 sx,
968 /* cb_start = */ 0,
969 has_any_cb_sgr,
970 has_any_cb_csi,
971 /* cb_text = */ 0,
972 has_any_cb_end,
973 &data
974 );
975
976 UNPROTECT(1);
977 return data.result;
978 }
979
980 /* ---------------------------------------------------------------------- */
981
982 struct strip_data {
983 struct cli_buffer buffer;
984 R_xlen_t done;
985 size_t num_tags;
986 SEXP result;
987 char sgr;
988 char csi;
989 };
990
strip_cb_start(SEXP rstr,const char * str,void * vdata)991 static int strip_cb_start(SEXP rstr, const char *str, void *vdata) {
992 struct strip_data *data = vdata;
993 data->num_tags = 0;
994 clic__buffer_reset(&data->buffer);
995 return 0;
996 }
997
strip_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)998 static int strip_cb_sgr(const char *param,
999 const char *intermed,
1000 const char *end,
1001 void *vdata) {
1002 struct strip_data *data = vdata;
1003 if (data->sgr) {
1004 data->num_tags ++;
1005 } else {
1006 clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
1007 }
1008 return 0;
1009 }
1010
strip_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)1011 static int strip_cb_csi(const char *param,
1012 const char *intermed,
1013 const char *end,
1014 void *vdata) {
1015 struct strip_data *data = vdata;
1016 if (data->csi) {
1017 data->num_tags ++;
1018 } else {
1019 clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
1020 }
1021 return 0;
1022 }
1023
strip_cb_text(const char * str,const char * end,void * vdata)1024 static int strip_cb_text(const char *str,
1025 const char *end,
1026 void *vdata) {
1027 struct strip_data *data = vdata;
1028 clic__buffer_push_piece(&data->buffer, str, end);
1029 return 0;
1030 }
1031
strip_cb_end(SEXP rstr,const char * str,void * vdata)1032 static int strip_cb_end(SEXP rstr,
1033 const char *str,
1034 void *vdata) {
1035 struct strip_data *data = vdata;
1036 if (data->num_tags == 0) {
1037 SET_STRING_ELT(data->result, data->done, rstr);
1038
1039 } else {
1040 SET_STRING_ELT(
1041 data->result,
1042 data->done,
1043 Rf_mkCharLenCE(
1044 clic__buffer_get(&data->buffer),
1045 clic__buffer_size(&data->buffer),
1046 CE_UTF8
1047 )
1048 );
1049 }
1050
1051 data->done ++;
1052 return 0;
1053 }
1054
1055 /* TODO: this would benefit from a non-iterator implementation, that
1056 would be much faster. */
1057
1058 /* TODO: strip hyperlinks */
1059
clic_ansi_strip(SEXP sx,SEXP sgr,SEXP csi)1060 SEXP clic_ansi_strip(SEXP sx, SEXP sgr, SEXP csi) {
1061 struct strip_data data;
1062 clic__buffer_init(&data.buffer);
1063 data.done = 0;
1064 data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
1065 data.sgr = LOGICAL(sgr)[0];
1066 data.csi = LOGICAL(csi)[0];
1067
1068 clic__ansi_iterator(
1069 sx,
1070 strip_cb_start,
1071 strip_cb_sgr,
1072 strip_cb_csi,
1073 strip_cb_text,
1074 strip_cb_end,
1075 &data
1076 );
1077
1078 clic__buffer_free(&data.buffer);
1079
1080 UNPROTECT(1);
1081 return data.result;
1082 }
1083
1084 /* ---------------------------------------------------------------------- */
1085
1086 struct nchar_data {
1087 R_xlen_t done;
1088 int *resptr;
1089 int *result;
1090 };
1091
nchar_cb_start(SEXP rstr,const char * str,void * vdata)1092 static int nchar_cb_start(SEXP rstr, const char *str, void *vdata) {
1093 struct nchar_data *data = vdata;
1094 data->resptr = data->result + data->done;
1095 if (rstr == NA_STRING) {
1096 *data->resptr = NA_INTEGER;
1097 return 1;
1098 } else {
1099 *data->resptr = 0;
1100 return 0;
1101 }
1102 }
1103
nchar_cb_text_graphemes(const char * str,const char * end,void * vdata)1104 static int nchar_cb_text_graphemes(const char *str,
1105 const char *end,
1106 void *vdata) {
1107 struct nchar_data *data = vdata;
1108 char *end2 = (char*) end;
1109 char oldend = *end2;
1110 int len = 0;
1111 struct grapheme_iterator iter;
1112
1113 *end2 = '\0';
1114 clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 0);
1115 while (iter.nxt_prop != -1) {
1116 clic_utf8_graphscan_next(&iter, NULL, NULL);
1117 len ++;
1118 }
1119 *data->resptr += len;
1120
1121 *end2 = oldend;
1122 return 0;
1123 }
1124
nchar_cb_text_bytes(const char * str,const char * end,void * vdata)1125 static int nchar_cb_text_bytes(const char *str,
1126 const char *end,
1127 void *vdata) {
1128 struct nchar_data *data = vdata;
1129 *data->resptr += (end - str);
1130 return 0;
1131 }
1132
nchar_cb_text_width(const char * str,const char * end,void * vdata)1133 static int nchar_cb_text_width(const char *str,
1134 const char *end,
1135 void *vdata) {
1136 struct nchar_data *data = vdata;
1137 char *end2 = (char*) end;
1138 char oldend = *end2;
1139 int len = 0, width;
1140 struct grapheme_iterator iter;
1141
1142 *end2 = '\0';
1143 clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 1);
1144 while (iter.nxt_prop != -1) {
1145 clic_utf8_graphscan_next(&iter, NULL, &width);
1146 len += width;
1147 }
1148 *data->resptr += len;
1149
1150 *end2 = oldend;
1151 return 0;
1152 }
1153
nchar_cb_text_codepoints(const char * str,const char * end,void * vdata)1154 static int nchar_cb_text_codepoints(const char *str,
1155 const char *end,
1156 void *vdata) {
1157 struct nchar_data *data = vdata;
1158
1159 while (str < end) {
1160 int len = UTF8LITE_UTF8_TOTAL_LEN(*str);
1161 str += len;
1162 *data->resptr += 1;
1163 }
1164
1165 return 0;
1166 }
1167
nchar_cb_end(SEXP rstr,const char * str,void * vdata)1168 static int nchar_cb_end(SEXP rstr,
1169 const char *str,
1170 void *vdata) {
1171 struct nchar_data *data = vdata;
1172 data->done ++;
1173 return 0;
1174 }
1175
1176 static clic__text_callback_t nchar_text_cbs[] = {
1177 nchar_cb_text_graphemes,
1178 nchar_cb_text_bytes,
1179 nchar_cb_text_width,
1180 nchar_cb_text_codepoints
1181 };
1182
1183 /* TODO: this would benefit from a non-iterator implementation, that
1184 would be much faster. */
1185
clic_ansi_nchar(SEXP sx,SEXP type)1186 SEXP clic_ansi_nchar(SEXP sx, SEXP type) {
1187 int ctype = INTEGER(type)[0] - 1;
1188 struct nchar_data data;
1189 data.done = 0;
1190 SEXP result = PROTECT(allocVector(INTSXP, XLENGTH(sx)));
1191 data.result = INTEGER(result);
1192
1193 clic__ansi_iterator(
1194 sx,
1195 nchar_cb_start,
1196 /* sgr = */ NULL,
1197 /* csi = */ NULL,
1198 nchar_text_cbs[ctype],
1199 nchar_cb_end,
1200 &data
1201 );
1202
1203 UNPROTECT(1);
1204 return result;
1205 }
1206