1 
2 #include "cli.h"
3 #include "errors.h"
4 #include "cleancall.h"
5 
6 #include <string.h>
7 #include <stdlib.h>
8 
9 /* ---------------------------------------------------------------------- */
10 
11 #define BUFFER_SIZE 4096
12 
13 static char static_buffer[BUFFER_SIZE];
14 
15 struct cli_buffer {
16   char *buf;
17   char *ptr;
18   size_t size;
19 };
20 
21 static void clic__buffer_init(struct cli_buffer *buf);
22 static void clic__buffer_free(struct cli_buffer *buf);
23 static void clic__buffer_reset(struct cli_buffer *buf);
24 static inline char *clic__buffer_get(struct cli_buffer *buf);
25 static inline size_t clic__buffer_size(struct cli_buffer *buf);
26 static inline void clic__buffer_push_str(struct cli_buffer *buf,
27                                          const char *str);
28 /* static inline void clic__buffer_push_str_len(struct cli_buffer *buf, */
29 /*                                              const char *str, */
30 /*                                              size_t len); */
31 static inline void clic__buffer_push_piece(struct cli_buffer *buf,
32                                            const char *from,
33                                            const char *to);
34 static void clic__buffer_realloc(struct cli_buffer *buf, size_t size);
35 static void clic__buffer_checklen(struct cli_buffer *buf, size_t len);
36 
clic__buffer_init(struct cli_buffer * buf)37 static void clic__buffer_init(struct cli_buffer *buf) {
38   buf->buf = static_buffer;
39   buf->ptr = static_buffer;
40   buf->size = sizeof(static_buffer);
41 }
42 
clic__buffer_reset(struct cli_buffer * buf)43 static void clic__buffer_reset(struct cli_buffer *buf) {
44   buf->ptr = buf->buf;
45 }
46 
clic__buffer_get(struct cli_buffer * buf)47 static inline char *clic__buffer_get(struct cli_buffer *buf) {
48   return buf->buf;
49 }
50 
clic__buffer_size(struct cli_buffer * buf)51 static inline size_t clic__buffer_size(struct cli_buffer *buf) {
52   return buf->ptr - buf->buf;
53 }
54 
clic__buffer_free(struct cli_buffer * buf)55 static void clic__buffer_free(struct cli_buffer *buf) {
56   if (buf->buf != static_buffer) free(buf->buf);
57 }
58 
clic__buffer_push_str(struct cli_buffer * buf,const char * str)59 static inline void clic__buffer_push_str(struct cli_buffer *buf,
60                                          const char *str) {
61   size_t len = strlen(str);
62   clic__buffer_checklen(buf, len);
63   strcpy(buf->ptr, str);
64   buf->ptr += len;
65 }
66 
67 /* static inline void clic__buffer_push_str_len(struct cli_buffer *buf, */
68 /*                                              const char *str, */
69 /*                                              size_t len) { */
70 /*   clic__buffer_checklen(buf, len); */
71 /*   memcpy(buf->ptr, str, len); */
72 /*   buf->ptr += len; */
73 /* } */
74 
clic__buffer_push_piece(struct cli_buffer * buf,const char * from,const char * to)75 static inline void clic__buffer_push_piece(struct cli_buffer *buf,
76                                            const char *from,
77                                            const char *to) {
78   size_t len = to - from;
79   clic__buffer_checklen(buf, len);
80   memcpy(buf->ptr, from, len);
81   buf->ptr += len;
82 }
83 
clic__buffer_realloc(struct cli_buffer * buf,size_t size)84 static void clic__buffer_realloc(struct cli_buffer *buf, size_t size) {
85   size_t current = buf->ptr - buf->buf;
86   char *old = buf->buf;
87   buf->size = size;
88   if (buf->buf == static_buffer) {
89     buf->buf = malloc(size);
90     if (!buf->buf) R_THROW_SYSTEM_ERROR("ANSI string error");
91     memcpy(buf->buf, old, current);
92   } else {
93     buf->buf = realloc(buf->buf, size);
94     if (!buf->buf) R_THROW_SYSTEM_ERROR("ANSI string error");
95   }
96   buf->ptr = buf->buf + current;
97 }
98 
clic__buffer_checklen(struct cli_buffer * buf,size_t len)99  static void clic__buffer_checklen(struct cli_buffer *buf, size_t len) {
100 
101   if (buf->ptr + len >= buf->buf + buf->size) {
102     size_t current = buf->ptr - buf->buf;
103     size_t prop = buf->size * 2;
104     if (prop < current + len) prop = current + len;
105     clic__buffer_realloc(buf, prop);
106   }
107 }
108 
109 /* ---------------------------------------------------------------------- */
110 
111 #define CLI_COL_256 254
112 #define CLI_COL_RGB 255
113 
114 struct cli_color {
115   /* 0 is off
116    * 30-37, 40-47, 90-97, 100-107
117    * CLI_COL_256 (254) is 8 bit
118    * CLI_COL_RGB (255) is 24 bit */
119   unsigned char col;
120   unsigned char r, g, b;
121 };
122 
123 #define DIFFERENT_COLOR(c1,c2) memcmp(&(c1), &(c2), sizeof(struct cli_color))
124 
125 struct cli_sgr_state {
126   struct cli_color fg;
127   struct cli_color bg;
128   char bold;
129   char faint;
130   char italic;
131   char underline;
132   char blink;
133   char inverse;
134   char hide;
135   char crossedout;
136 };
137 
138 struct cli_ansi_state {
139   struct cli_sgr_state old;
140   struct cli_sgr_state new;
141   char unknown;
142   char off;
143 };
144 
clic__readnum(char ** ptr,unsigned int * num)145 static void clic__readnum(char **ptr, unsigned int *num) {
146   int len = 0;
147   if ((*ptr)[0] != ';') return;
148   (*ptr) ++;
149   sscanf(*ptr, "%u%n", num, &len);
150   *ptr += len;
151   while (**ptr != ';' && **ptr != '\0') (*ptr) ++;
152 }
153 
clic__parse_color(char ** ptr,const char * end,struct cli_color * col)154 static void clic__parse_color(char **ptr, const char *end, struct cli_color *col) {
155   /* This can be:
156    * - 5;<n>
157    * - 2;<r>;<g>;<b>
158    */
159 
160   /* Has to start with ;5; or ;2;, otherwise we skip the whole tag */
161   if ((*ptr)[0] != ';' ||
162       ((*ptr)[1] != '5' && (*ptr)[1] != '2') ||
163       (*ptr)[2] != ';') {
164     *ptr = (char*) end;
165     col->r = col->g = col->b = 0;
166     return;
167   }
168 
169   col->col = (*ptr)[1] == '5' ? CLI_COL_256 : CLI_COL_RGB;
170   (*ptr) += 2;
171 
172   /* Temporarily create a zero terminated string for sscanf */
173   char backup = *end;
174   char *end2 = (char*) end;
175   *end2 = '\0';
176 
177   unsigned int r = 0, g = 0, b = 0;
178 
179   clic__readnum(ptr, &r);
180   if (col->col == CLI_COL_RGB) {
181     clic__readnum(ptr, &g);
182     clic__readnum(ptr, &b);
183   }
184 
185   col->r = (unsigned char) r;
186   col->g = (unsigned char) g;
187   col->b = (unsigned char) b;
188 
189   *end2 = backup;
190 }
191 
clic__ansi_update_state(const char * param,const char * intermed,const char * end,struct cli_buffer * buffer,struct cli_ansi_state * state)192 static void clic__ansi_update_state(const char *param,
193                                     const char *intermed,
194                                     const char *end,
195                                     struct cli_buffer *buffer,
196                                     struct cli_ansi_state *state) {
197 
198   char *startptr = (char*) param, *endptr;
199   do {
200     long num = strtol(startptr, &endptr, 10);
201     if (endptr == startptr || num == 0) {
202       memset(&state->new, 0, sizeof(state->new));
203       state->off = 1;
204 
205     } else if (num == 1) {
206       state->new.bold = 1;
207 
208     } else if (num == 2) {
209       state->new.faint = 1;
210 
211     } else if (num == 3) {
212       state->new.italic = 1;
213 
214     } else if (num == 4) {
215       state->new.underline = 1;
216 
217     } else if (num == 5) {
218       state->new.blink = 1;
219 
220     } else if (num == 7) {
221       state->new.inverse = 1;
222 
223     } else if (num == 8) {
224       state->new.hide = 1;
225 
226     } else if (num == 9) {
227       state->new.crossedout = 1;
228 
229     } else if (num == 22) {
230       state->new.bold = state->new.faint = 0;
231 
232     } else if (num == 23) {
233       state->new.italic = 0;
234 
235     } else if (num == 24) {
236       state->new.underline = 0;
237 
238     } else if (num == 25) {
239       state->new.blink = 0;
240 
241     } else if (num == 27) {
242       state->new.inverse = 0;
243 
244     } else if (num == 28) {
245       state->new.hide = 0;
246 
247     } else if (num == 29) {
248       state->new.crossedout = 0;
249 
250     } else if ((num >= 30 && num <= 37) || (num >= 90 && num <= 97)) {
251       state->new.fg.col = num;
252 
253     } else if (num == 38) {
254       clic__parse_color(&endptr, intermed, &state->new.fg);
255 
256     } else if (num == 39) {
257       state->new.fg.col = 0;
258 
259     } else if ((num >= 40 && num <= 47) || (num >= 100 && num <= 107)) {
260       state->new.bg.col = num;
261 
262     } else if (num == 48) {
263       clic__parse_color(&endptr, intermed, &state->new.bg);
264 
265     } else if (num == 49) {
266       state->new.bg.col = 0;
267 
268     } else {
269       /* Keep tag as is, and emit it right away */
270       state->unknown = 1;
271       clic__buffer_push_piece(buffer, param - 2, end + 1);
272     }
273 
274     /* The next attribute, if any */
275     startptr = endptr + 1;
276   } while (endptr < intermed && *endptr == ';');
277 }
278 
279 #define EMIT(s) clic__buffer_push_str(buffer, "\033[" s "m")
280 #define EMITS(s) clic__buffer_push_str(buffer, (s))
281 
clic__state_update_buffer(struct cli_buffer * buffer,struct cli_ansi_state * state)282 static void clic__state_update_buffer(struct cli_buffer *buffer,
283                                       struct cli_ansi_state *state) {
284 
285   char col[20];
286 
287   if (state->unknown && state->off) {
288     state->unknown = state->off = 0;
289     EMIT("0");
290   }
291 
292   /* Closing tags ------------------------------------------------------ */
293 
294   if (state->old.bg.col != 0 && state->new.bg.col != state->old.bg.col) {
295     EMIT("49");
296   }
297 
298   if (state->old.fg.col != 0 && state->new.fg.col != state->old.fg.col) {
299     EMIT("39");
300   }
301 
302   if (state->new.crossedout < state->old.crossedout) {
303     EMIT("29");
304   }
305 
306   if (state->new.hide < state->old.hide) {
307     EMIT("28");
308   }
309 
310   if (state->new.inverse < state->old.inverse) {
311     EMIT("27");
312   }
313 
314   if (state->new.blink < state->old.blink) {
315     EMIT("25");
316   }
317 
318   if (state->new.underline < state->old.underline) {
319     EMIT("24");
320   }
321 
322   if (state->new.italic < state->old.italic) {
323     EMIT("23");
324   }
325 
326   if (state->new.faint < state->old.faint) {
327     EMIT("22");
328   }
329 
330   if (state->new.bold < state->old.bold) {
331     /* TODO: handle bold + faint interaction */
332     EMIT("22");
333   }
334 
335   /* Opening tags in reverse order ------------------------------------- */
336 
337   if (state->new.bold > state->old.bold) {
338     EMIT("1");
339   }
340 
341   if (state->new.faint > state->old.faint) {
342     EMIT("2");
343   }
344 
345   if (state->new.italic > state->old.italic) {
346     EMIT("3");
347   }
348 
349   if (state->new.underline > state->old.underline) {
350     EMIT("4");
351   }
352 
353   if (state->new.blink > state->old.blink) {
354     EMIT("5");
355   }
356 
357   if (state->new.inverse > state->old.inverse) {
358     EMIT("7");
359   }
360 
361   if (state->new.hide > state->old.hide) {
362     EMIT("8");
363   }
364 
365   if (state->new.crossedout > state->old.crossedout) {
366     EMIT("9");
367   }
368 
369   if (state->new.fg.col != 0 &&
370       DIFFERENT_COLOR(state->new.fg, state->old.fg)) {
371     if (state->new.fg.col == CLI_COL_256) {
372       snprintf(col, sizeof(col), "\033[38;5;%um", state->new.fg.r);
373     } else if (state->new.fg.col == CLI_COL_RGB) {
374       snprintf(col, sizeof(col), "\033[38;2;%u;%u;%um",
375                state->new.fg.r, state->new.fg.g, state->new.fg.b);
376     } else {
377       snprintf(col, sizeof(col), "\033[%um", state->new.fg.col);
378     }
379     EMITS(col);
380   }
381 
382   if (state->new.bg.col != 0 &&
383       DIFFERENT_COLOR(state->new.bg, state->old.bg)) {
384     if (state->new.bg.col == CLI_COL_256) {
385       snprintf(col, sizeof(col), "\033[48;5;%um", state->new.bg.r);
386     } else if (state->new.bg.col == CLI_COL_RGB) {
387       snprintf(col, sizeof(col), "\033[48;2;%u;%u;%um",
388                state->new.bg.r, state->new.bg.g, state->new.bg.b);
389     } else {
390       snprintf(col, sizeof(col), "\033[%um", state->new.bg.col);
391     }
392     EMITS(col);
393   }
394 
395   state->old = state->new;
396 }
397 
398 typedef int (*clic__start_callback_t)(SEXP rstr,
399                                       const char *str,
400                                       void *data);
401 typedef int (*clic__tag_callback_t)(const char *param,
402                                    const char *intermed,
403                                    const char *end,
404                                    void *data);
405 typedef int (*clic__text_callback_t)(const char *str,
406                                       const char *end,
407                                       void *data);
408 typedef int (*clic__end_callback_t)(SEXP rstr,
409                                     const char *str,
410                                     void *data);
411 
clic__ansi_iterator(SEXP sx,clic__start_callback_t start_cb,clic__tag_callback_t sgr_cb,clic__tag_callback_t csi_cb,clic__text_callback_t text_cb,clic__end_callback_t end_cb,void * data)412 void clic__ansi_iterator(SEXP sx,
413                          clic__start_callback_t start_cb,
414                          clic__tag_callback_t sgr_cb,
415                          clic__tag_callback_t csi_cb,
416                          clic__text_callback_t text_cb,
417                          clic__end_callback_t end_cb,
418                          void *data) {
419 
420   R_xlen_t i, len = XLENGTH(sx);
421   for (i = 0; i < len; i++) {
422     SEXP str = STRING_ELT(sx, i);
423     const char *ox = CHAR(str);
424     const char *x = ox;
425     const char *shaft = ox;
426     const char *s_start;
427     const char *s_param;
428     const char *s_intermed;
429     const char *s_end;
430 
431     if (start_cb) if (start_cb(str, ox, data)) goto end;
432     if (str == NA_STRING) goto end;
433 
434     while (*x != 0) {
435       if (*x == '\033' && *(x + 1) == '[') {
436         s_start = x;
437         s_param = s_intermed = x + 2;
438         while (*s_intermed >= 0x30 && *s_intermed <= 0x3f) s_intermed++;
439         s_end = s_intermed;
440         while (*s_end >= 0x20 && *s_end <= 0x2f) s_end++;
441         if (s_start > shaft && text_cb) {
442           if (text_cb(shaft, s_start, data)) goto end;
443         }
444         if (*s_end == 'm') {
445           if (sgr_cb) {
446             if (sgr_cb(s_param, s_intermed, s_end, data)) goto end;
447           }
448         } else {
449           if (csi_cb) {
450             if (csi_cb(s_param, s_intermed, s_end, data)) goto end;
451           }
452         }
453         shaft = s_end + 1;
454         x = *s_end ? s_end + 1 : s_end;
455       } else {
456         x++;
457       }
458       /* This is not needed, but slightly faster this way */
459       while (*x != '\033' && *x != '\0') x++;
460     }
461 
462     if (x > shaft && text_cb) text_cb(shaft, x, data);
463 
464   end:
465     if (end_cb) end_cb(str, ox, data);
466   }
467 }
468 
469 /* ---------------------------------------------------------------------- */
470 
471 struct simplify_data {
472   struct cli_ansi_state state;
473   struct cli_buffer buffer;
474   R_xlen_t done;
475   size_t num_tags;
476   SEXP result;
477   char keep_csi;
478 };
479 
simplify_cb_start(SEXP rstr,const char * str,void * vdata)480 static int simplify_cb_start(SEXP rstr, const char *str, void *vdata) {
481   struct simplify_data *data = vdata;
482   data->num_tags = 0;
483   clic__buffer_reset(&data->buffer);
484   return 0;
485 }
486 
simplify_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)487 static int simplify_cb_sgr(const char *param,
488                            const char *intermed,
489                            const char *end,
490                            void *vdata) {
491   struct simplify_data *data = vdata;
492   data->num_tags ++;
493   clic__ansi_update_state(param, intermed, end, &data->buffer, &data->state);
494   return 0;
495 }
496 
simplify_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)497 static int simplify_cb_csi(const char *param,
498                            const char *intermed,
499                            const char *end,
500                            void *vdata) {
501   struct simplify_data *data = vdata;
502   if (data->keep_csi) {
503     clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
504   } else {
505     /* Need to count, to avoid a verbatim STRSXP copy */
506     data->num_tags ++;
507   }
508   return 0;
509 }
510 
simplify_cb_text(const char * str,const char * end,void * vdata)511 static int simplify_cb_text(const char *str,
512                             const char *end,
513                             void *vdata) {
514   struct simplify_data *data = vdata;
515   clic__state_update_buffer(&data->buffer, &data->state);
516   clic__buffer_push_piece(&data->buffer, str, end);
517   return 0;
518 }
519 
simplify_cb_end(SEXP rstr,const char * str,void * vdata)520 static int simplify_cb_end(SEXP rstr,
521                            const char *str,
522                            void *vdata) {
523   struct simplify_data *data = vdata;
524   memset(&data->state.new, 0, sizeof(struct cli_sgr_state));
525   clic__state_update_buffer(&data->buffer, &data->state);
526   if (data->num_tags == 0) {
527     SET_STRING_ELT(data->result, data->done, rstr);
528 
529   } else {
530     SET_STRING_ELT(
531       data->result,
532       data->done,
533       Rf_mkCharLenCE(
534         clic__buffer_get(&data->buffer),
535         clic__buffer_size(&data->buffer),
536         CE_UTF8
537       )
538     );
539   }
540 
541   data->done ++;
542   return 0;
543 }
544 
clic_ansi_simplify(SEXP sx,SEXP keep_csi)545 SEXP clic_ansi_simplify(SEXP sx, SEXP keep_csi) {
546   struct simplify_data data;
547   memset(&data.state, 0, sizeof(data.state));
548   clic__buffer_init(&data.buffer);
549   data.done = 0;
550   data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
551   data.keep_csi = LOGICAL(keep_csi)[0];
552 
553   clic__ansi_iterator(
554     sx,
555     simplify_cb_start,
556     simplify_cb_sgr,
557     simplify_cb_csi,
558     simplify_cb_text,
559     simplify_cb_end,
560     &data
561   );
562 
563   clic__buffer_free(&data.buffer);
564 
565   SEXP ocls = PROTECT(getAttrib(sx, R_ClassSymbol));
566   int oclslen = isNull(ocls) ? 0 : LENGTH(ocls);
567   int has_as = oclslen == 0 ? 0 : Rf_inherits(sx, "ansi_string");
568   int has_ch = oclslen == 0 ? 0 : Rf_inherits(sx, "character");
569   int i, j = 0, clslen = oclslen + !has_as + !has_ch;
570   SEXP cls = PROTECT(allocVector(STRSXP, clslen));
571   if (!has_as) SET_STRING_ELT(cls, j++, mkChar("ansi_string"));
572   for (i = 0; i < oclslen; i++) {
573     SET_STRING_ELT(cls, j++, STRING_ELT(ocls, i));
574   }
575   if (!has_ch) SET_STRING_ELT(cls, j++, mkChar("character"));
576   setAttrib(data.result, R_ClassSymbol, cls);
577   UNPROTECT(3);
578   return data.result;
579 }
580 
581 /* ---------------------------------------------------------------------- */
582 
583 struct substr_data {
584   struct cli_ansi_state state;
585   struct cli_buffer buffer;
586   R_xlen_t done;
587   SEXP result;
588   int *start;
589   int *stop;
590   int pos;
591 };
592 
substr_cb_start(SEXP rstr,const char * str,void * vdata)593 static int substr_cb_start(SEXP rstr, const char *str, void *vdata) {
594   struct substr_data *data = vdata;
595   data->pos = 1;
596   clic__buffer_reset(&data->buffer);
597   return rstr == NA_STRING;
598 }
599 
substr_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)600 static int substr_cb_sgr(const char *param,
601                          const char *intermed,
602                          const char *end,
603                          void *vdata) {
604   struct substr_data *data = vdata;
605   clic__ansi_update_state(param, intermed, end, &data->buffer, &data->state);
606   return 0;
607 }
608 
substr_cb_text(const char * str,const char * end,void * vdata)609 static int substr_cb_text(const char *str,
610                           const char *end,
611                           void *vdata) {
612   struct substr_data *data = vdata;
613   int start = data->start[data->done];
614   int stop = data->stop[data->done];
615 
616   char *end2 = (char*) end;
617   char oldend = *end2;
618   *end2 = '\0';
619 
620   /* Skip before start */
621   struct grapheme_iterator iter;
622   if (data->pos < start) {
623     clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 0);
624     while (data->pos < start && iter.nxt_prop != -1) {
625       clic_utf8_graphscan_next(&iter, NULL,  NULL);
626       data->pos++;
627     }
628     str = (const char*) iter.cnd;
629   }
630 
631   /* Add before stop */
632   if (data->pos <= stop) {
633     const char *from = str;
634     clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 0);
635     while (data->pos <= stop && iter.nxt_prop != -1) {
636       clic_utf8_graphscan_next(&iter, NULL, NULL);
637       data->pos++;
638     }
639     str = (const char*) iter.cnd;
640     if (from < str) {
641       clic__state_update_buffer(&data->buffer, &data->state);
642       clic__buffer_push_piece(&data->buffer, from, str);
643     }
644   }
645 
646   *end2 = oldend;
647 
648   /* If we are done, then just close all open tags */
649   if (data->pos > stop) {
650     memset(&data->state.new, 0, sizeof(struct cli_sgr_state));
651     clic__state_update_buffer(&data->buffer, &data->state);
652     return 1;
653   } else {
654     return 0;
655   }
656 }
657 
substr_cb_end(SEXP rstr,const char * str,void * vdata)658 static int substr_cb_end(SEXP rstr,
659                          const char *str,
660                          void *vdata) {
661   struct substr_data *data = vdata;
662   memset(&data->state.new, 0, sizeof(struct cli_sgr_state));
663   clic__state_update_buffer(&data->buffer, &data->state);
664   if (rstr == NA_STRING) {
665     SET_STRING_ELT(data->result, data->done, rstr);
666   } else {
667     SET_STRING_ELT(
668       data->result,
669       data->done,
670       Rf_mkCharLenCE(
671         clic__buffer_get(&data->buffer),
672         clic__buffer_size(&data->buffer),
673         CE_UTF8
674      )
675     );
676   }
677 
678   data->done++;
679   return 0;
680 }
681 
clic_ansi_substr(SEXP sx,SEXP start,SEXP stop)682 SEXP clic_ansi_substr(SEXP sx, SEXP start, SEXP stop) {
683   struct substr_data data;
684   memset(&data.state, 0, sizeof(data.state));
685   clic__buffer_init(&data.buffer);
686   data.done = 0;
687   data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
688   data.start = INTEGER(start);
689   data.stop = INTEGER(stop);
690 
691   clic__ansi_iterator(
692     sx,
693     substr_cb_start,
694     substr_cb_sgr,
695     NULL,
696     substr_cb_text,
697     substr_cb_end,
698     &data
699   );
700 
701   clic__buffer_free(&data.buffer);
702 
703   SEXP ocls = PROTECT(getAttrib(sx, R_ClassSymbol));
704   int oclslen = isNull(ocls) ? 0 : LENGTH(ocls);
705   int has_as = oclslen == 0 ? 0 : Rf_inherits(sx, "ansi_string");
706   int has_ch = oclslen == 0 ? 0 : Rf_inherits(sx, "character");
707   int i, j = 0, clslen = oclslen + !has_as + !has_ch;
708   SEXP cls = PROTECT(allocVector(STRSXP, clslen));
709   if (!has_as) SET_STRING_ELT(cls, j++, mkChar("ansi_string"));
710   for (i = 0; i < oclslen; i++) {
711     SET_STRING_ELT(cls, j++, STRING_ELT(ocls, i));
712   }
713   if (!has_ch) SET_STRING_ELT(cls, j++, mkChar("character"));
714   setAttrib(data.result, R_ClassSymbol, cls);
715   UNPROTECT(3);
716   return data.result;
717 }
718 
719 /* ---------------------------------------------------------------------- */
720 
721 struct html_data {
722   struct cli_ansi_state state;
723   struct cli_buffer buffer;
724   const char *str;
725   R_xlen_t done;
726   SEXP result;
727   char had_tags;
728   char keep_csi;
729 };
730 
731 #define EMITS1(s) do {                            \
732   if (first) {                                    \
733     EMITS("<span class=\"ansi");                  \
734     first = 0;                                    \
735   }                                               \
736   EMITS(s); } while (0)
737 
738 
clic__html_start(struct html_data * data)739 static void clic__html_start(struct html_data *data) {
740 
741   struct cli_buffer *buffer = &data->buffer;
742   struct cli_ansi_state *state = &data->state;
743 
744   char col[64];
745 
746   int first = 1;
747  /* Opening tags ------------------------------------------------------ */
748 
749   if (state->new.bold > state->old.bold) {
750     EMITS1(" ansi-bold");
751   }
752 
753   if (state->new.faint > state->old.faint) {
754     EMITS1(" ansi-faint");
755   }
756 
757   if (state->new.italic > state->old.italic) {
758     EMITS1(" ansi-italic");
759   }
760 
761   if (state->new.underline > state->old.underline) {
762     EMITS1(" ansi-underline");
763   }
764 
765   if (state->new.blink > state->old.blink) {
766     EMITS1(" ansi-blink");
767   }
768 
769   if (state->new.inverse > state->old.inverse) {
770     EMITS1(" ansi-inverse");
771   }
772 
773   if (state->new.hide > state->old.hide) {
774     EMITS1(" ansi-hide");
775   }
776 
777   if (state->new.crossedout > state->old.crossedout) {
778     EMITS1(" ansi-crossedout");
779   }
780 
781   if (state->new.fg.col != 0 &&
782       DIFFERENT_COLOR(state->new.fg, state->old.fg)) {
783     if (state->new.fg.col == CLI_COL_256) {
784       snprintf(col, sizeof(col), " ansi-color-%u", state->new.fg.r);
785     } else if (state->new.fg.col == CLI_COL_RGB) {
786       snprintf(col, sizeof(col), " ansi-color-%u-%u-%u",
787                state->new.fg.r, state->new.fg.g, state->new.fg.b);
788     } else {
789       unsigned char ncol = state->new.fg.col - 30;
790       if (ncol > 7) ncol -= 60;
791       snprintf(col, sizeof(col), " ansi-color-%u", ncol);
792     }
793     EMITS1(col);
794   }
795 
796   if (state->new.bg.col != 0 &&
797       DIFFERENT_COLOR(state->new.bg, state->old.bg)) {
798     if (state->new.bg.col == CLI_COL_256) {
799       snprintf(col, sizeof(col), " ansi-bg-color-%u",
800                state->new.bg.r);
801     } else if (state->new.bg.col == CLI_COL_RGB) {
802       snprintf(col, sizeof(col), " ansi-bg-color-%u-%u-%u",
803                state->new.bg.r, state->new.bg.g, state->new.bg.b);
804     } else {
805       unsigned char ncol = state->new.bg.col - 40;
806       if (ncol > 7) ncol -= 60;
807       snprintf(col, sizeof(col), " ansi-bg-color-%u", ncol);
808     }
809     EMITS1(col);
810   }
811 
812   state->old = state->new;
813 
814   if (!first) EMITS("\">");
815   data->had_tags = !first;
816 }
817 
clic__html_end(struct html_data * data)818 static void clic__html_end(struct html_data *data) {
819 
820   struct cli_buffer *buffer = &data->buffer;
821   if (data->had_tags) EMITS("</span>");
822 }
823 
html_cb_start(SEXP rstr,const char * str,void * vdata)824 static int html_cb_start(SEXP rstr, const char *str, void *vdata) {
825   struct html_data *data = vdata;
826   clic__buffer_reset(&data->buffer);
827   return rstr == NA_STRING;
828 }
829 
html_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)830 static int html_cb_sgr(const char *param,
831                        const char *intermed,
832                        const char *end,
833                        void *vdata) {
834   struct html_data *data = vdata;
835   clic__ansi_update_state(param, intermed, end, &data->buffer, &data->state);
836   return 0;
837 }
838 
html_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)839 static int html_cb_csi(const char *param,
840                        const char *intermed,
841                        const char *end,
842                        void *vdata) {
843   struct html_data *data = vdata;
844   if (data->keep_csi) {
845     clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
846   }
847   return 0;
848 }
849 
html_cb_text(const char * str,const char * end,void * vdata)850 static int html_cb_text(const char *str,
851                         const char *end,
852                         void *vdata) {
853   struct html_data *data = vdata;
854   clic__html_start(data);
855   clic__buffer_push_piece(&data->buffer, str, end);
856   clic__html_end(data);
857   return 0;
858 }
859 
html_cb_end(SEXP rstr,const char * str,void * vdata)860 static int html_cb_end(SEXP rstr,
861                        const char *str,
862                        void *vdata) {
863   struct html_data *data = vdata;
864   memset(&data->state.new, 0, sizeof(data->state.new));
865   if (rstr == NA_STRING) {
866     SET_STRING_ELT(data->result, data->done, rstr);
867   } else {
868     SET_STRING_ELT(
869       data->result,
870       data->done,
871       Rf_mkCharLenCE(
872         clic__buffer_get(&data->buffer),
873         clic__buffer_size(&data->buffer),
874         CE_UTF8
875       )
876     );
877   }
878 
879   data->done++;
880   return 0;
881 }
882 
clic_ansi_html(SEXP sx,SEXP keep_csi)883 SEXP clic_ansi_html(SEXP sx, SEXP keep_csi) {
884   struct html_data data;
885   memset(&data.state, 0, sizeof(data.state));
886   clic__buffer_init(&data.buffer);
887   data.done = 0;
888   data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
889   data.keep_csi = LOGICAL(keep_csi)[0];
890 
891   clic__ansi_iterator(
892     sx,
893     html_cb_start,
894     html_cb_sgr,
895     html_cb_csi,
896     html_cb_text,
897     html_cb_end,
898     &data
899   );
900 
901   clic__buffer_free(&data.buffer);
902 
903   UNPROTECT(1);
904   return data.result;
905 }
906 
907 /* ---------------------------------------------------------------------- */
908 
909 struct has_any_data {
910   R_xlen_t done;
911   SEXP result;
912   char sgr;
913   char csi;
914   char has;
915 };
916 
has_any_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)917 static int has_any_cb_sgr(const char *param,
918                           const char *intermed,
919                           const char *end,
920                           void *vdata) {
921   struct has_any_data *data = vdata;
922   if (data->sgr) {
923     data->has = 1;
924     return 1;
925   } else {
926     return 0;
927   }
928 }
929 
has_any_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)930 static int has_any_cb_csi(const char *param,
931                           const char *intermed,
932                           const char *end,
933                           void *vdata) {
934   struct has_any_data *data = vdata;
935   if (data->csi) {
936     data->has = 1;
937     return 1;
938   } else {
939     return 0;
940   }
941 }
942 
943 
has_any_cb_end(SEXP rstr,const char * str,void * vdata)944 static int has_any_cb_end(SEXP rstr,
945                           const char *str,
946                           void *vdata) {
947   struct has_any_data *data = vdata;
948   if (rstr == NA_STRING) {
949     LOGICAL(data->result)[data->done] = NA_LOGICAL;
950   } else {
951     LOGICAL(data->result)[data->done] = data->has;
952   }
953   data->has = 0;
954   data->done ++;
955   return 0;
956 }
957 
clic_ansi_has_any(SEXP sx,SEXP sgr,SEXP csi)958 SEXP clic_ansi_has_any(SEXP sx, SEXP sgr, SEXP csi) {
959   struct has_any_data data;
960   data.done = 0;
961   data.has = 0;
962   data.result = PROTECT(allocVector(LGLSXP, XLENGTH(sx)));
963   data.sgr = LOGICAL(sgr)[0];
964   data.csi = LOGICAL(csi)[0];
965 
966   clic__ansi_iterator(
967     sx,
968     /* cb_start = */ 0,
969     has_any_cb_sgr,
970     has_any_cb_csi,
971     /* cb_text = */ 0,
972     has_any_cb_end,
973     &data
974   );
975 
976   UNPROTECT(1);
977   return data.result;
978 }
979 
980 /* ---------------------------------------------------------------------- */
981 
982 struct strip_data {
983   struct cli_buffer buffer;
984   R_xlen_t done;
985   size_t num_tags;
986   SEXP result;
987   char sgr;
988   char csi;
989 };
990 
strip_cb_start(SEXP rstr,const char * str,void * vdata)991 static int strip_cb_start(SEXP rstr, const char *str, void *vdata) {
992   struct strip_data *data = vdata;
993   data->num_tags = 0;
994   clic__buffer_reset(&data->buffer);
995   return 0;
996 }
997 
strip_cb_sgr(const char * param,const char * intermed,const char * end,void * vdata)998 static int strip_cb_sgr(const char *param,
999                         const char *intermed,
1000                         const char *end,
1001                         void *vdata) {
1002   struct strip_data *data = vdata;
1003   if (data->sgr) {
1004     data->num_tags ++;
1005   } else {
1006     clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
1007   }
1008   return 0;
1009 }
1010 
strip_cb_csi(const char * param,const char * intermed,const char * end,void * vdata)1011 static int strip_cb_csi(const char *param,
1012                         const char *intermed,
1013                         const char *end,
1014                         void *vdata) {
1015   struct strip_data *data = vdata;
1016   if (data->csi) {
1017     data->num_tags ++;
1018   } else {
1019     clic__buffer_push_piece(&data->buffer, param - 2, end + 1);
1020   }
1021   return 0;
1022 }
1023 
strip_cb_text(const char * str,const char * end,void * vdata)1024 static int strip_cb_text(const char *str,
1025                         const char *end,
1026                         void *vdata) {
1027   struct strip_data *data = vdata;
1028   clic__buffer_push_piece(&data->buffer, str, end);
1029   return 0;
1030 }
1031 
strip_cb_end(SEXP rstr,const char * str,void * vdata)1032 static int strip_cb_end(SEXP rstr,
1033                           const char *str,
1034                           void *vdata) {
1035   struct strip_data *data = vdata;
1036   if (data->num_tags == 0) {
1037     SET_STRING_ELT(data->result, data->done, rstr);
1038 
1039   } else {
1040     SET_STRING_ELT(
1041       data->result,
1042       data->done,
1043       Rf_mkCharLenCE(
1044         clic__buffer_get(&data->buffer),
1045         clic__buffer_size(&data->buffer),
1046         CE_UTF8
1047       )
1048     );
1049   }
1050 
1051   data->done ++;
1052   return 0;
1053 }
1054 
1055 /* TODO: this would benefit from a non-iterator implementation, that
1056    would be much faster. */
1057 
1058 /* TODO: strip hyperlinks */
1059 
clic_ansi_strip(SEXP sx,SEXP sgr,SEXP csi)1060 SEXP clic_ansi_strip(SEXP sx, SEXP sgr, SEXP csi) {
1061   struct strip_data data;
1062   clic__buffer_init(&data.buffer);
1063   data.done = 0;
1064   data.result = PROTECT(allocVector(STRSXP, XLENGTH(sx)));
1065   data.sgr = LOGICAL(sgr)[0];
1066   data.csi = LOGICAL(csi)[0];
1067 
1068   clic__ansi_iterator(
1069     sx,
1070     strip_cb_start,
1071     strip_cb_sgr,
1072     strip_cb_csi,
1073     strip_cb_text,
1074     strip_cb_end,
1075     &data
1076   );
1077 
1078   clic__buffer_free(&data.buffer);
1079 
1080   UNPROTECT(1);
1081   return data.result;
1082 }
1083 
1084 /* ---------------------------------------------------------------------- */
1085 
1086 struct nchar_data {
1087   R_xlen_t done;
1088   int *resptr;
1089   int *result;
1090 };
1091 
nchar_cb_start(SEXP rstr,const char * str,void * vdata)1092 static int nchar_cb_start(SEXP rstr, const char *str, void *vdata) {
1093   struct nchar_data *data = vdata;
1094   data->resptr = data->result + data->done;
1095   if (rstr == NA_STRING) {
1096     *data->resptr = NA_INTEGER;
1097     return 1;
1098   } else {
1099     *data->resptr = 0;
1100     return 0;
1101   }
1102 }
1103 
nchar_cb_text_graphemes(const char * str,const char * end,void * vdata)1104 static int nchar_cb_text_graphemes(const char *str,
1105                                    const char *end,
1106                                    void *vdata) {
1107   struct nchar_data *data = vdata;
1108   char *end2 = (char*) end;
1109   char oldend = *end2;
1110   int len = 0;
1111   struct grapheme_iterator iter;
1112 
1113   *end2 = '\0';
1114   clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 0);
1115   while (iter.nxt_prop != -1) {
1116     clic_utf8_graphscan_next(&iter, NULL, NULL);
1117     len ++;
1118   }
1119   *data->resptr += len;
1120 
1121   *end2 = oldend;
1122   return 0;
1123 }
1124 
nchar_cb_text_bytes(const char * str,const char * end,void * vdata)1125 static int nchar_cb_text_bytes(const char *str,
1126                                const char *end,
1127                                void *vdata) {
1128   struct nchar_data *data = vdata;
1129   *data->resptr += (end - str);
1130   return 0;
1131 }
1132 
nchar_cb_text_width(const char * str,const char * end,void * vdata)1133 static int nchar_cb_text_width(const char *str,
1134                                const char *end,
1135                                void *vdata) {
1136   struct nchar_data *data = vdata;
1137   char *end2 = (char*) end;
1138   char oldend = *end2;
1139   int len = 0, width;
1140   struct grapheme_iterator iter;
1141 
1142   *end2 = '\0';
1143   clic_utf8_graphscan_make(&iter, (const uint8_t*) str, /* width = */ 1);
1144   while (iter.nxt_prop != -1) {
1145     clic_utf8_graphscan_next(&iter, NULL, &width);
1146     len += width;
1147   }
1148   *data->resptr += len;
1149 
1150   *end2 = oldend;
1151   return 0;
1152 }
1153 
nchar_cb_text_codepoints(const char * str,const char * end,void * vdata)1154 static int nchar_cb_text_codepoints(const char *str,
1155                                     const char *end,
1156                                     void *vdata) {
1157   struct nchar_data *data = vdata;
1158 
1159   while (str < end) {
1160     int len = UTF8LITE_UTF8_TOTAL_LEN(*str);
1161     str += len;
1162     *data->resptr += 1;
1163   }
1164 
1165   return 0;
1166 }
1167 
nchar_cb_end(SEXP rstr,const char * str,void * vdata)1168 static int nchar_cb_end(SEXP rstr,
1169                           const char *str,
1170                           void *vdata) {
1171   struct nchar_data *data = vdata;
1172   data->done ++;
1173   return 0;
1174 }
1175 
1176   static clic__text_callback_t nchar_text_cbs[] = {
1177     nchar_cb_text_graphemes,
1178     nchar_cb_text_bytes,
1179     nchar_cb_text_width,
1180     nchar_cb_text_codepoints
1181   };
1182 
1183   /* TODO: this would benefit from a non-iterator implementation, that
1184      would be much faster. */
1185 
clic_ansi_nchar(SEXP sx,SEXP type)1186 SEXP clic_ansi_nchar(SEXP sx, SEXP type) {
1187   int ctype = INTEGER(type)[0] - 1;
1188   struct nchar_data data;
1189   data.done = 0;
1190   SEXP result = PROTECT(allocVector(INTSXP, XLENGTH(sx)));
1191   data.result = INTEGER(result);
1192 
1193   clic__ansi_iterator(
1194     sx,
1195     nchar_cb_start,
1196     /* sgr   = */ NULL,
1197     /* csi   = */ NULL,
1198     nchar_text_cbs[ctype],
1199     nchar_cb_end,
1200     &data
1201   );
1202 
1203   UNPROTECT(1);
1204   return result;
1205 }
1206