1 /*
2  * Copyright (C) 2021  Brodie Gaslam
3  *
4  * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
17  */
18 
19 #include "fansi.h"
20 
21 /*
22  * Determine how many spaces tab width should be
23  *
24  * state should be at a tab
25  */
FANSI_tab_width(struct FANSI_state state,SEXP tab_stops)26 int FANSI_tab_width(struct FANSI_state state, SEXP tab_stops) {
27   R_xlen_t stops = XLENGTH(tab_stops);
28   if(!stops)
29     error("Internal Error: must have at least one tab stop");  // nocov
30   if(*(state.string + state.pos_byte) != '\t')
31     error("Internal Error: computing tab width on not a tab"); // nocov
32 
33   int tab_width = 0;
34   R_xlen_t stop_idx = 0;
35 
36   while(state.pos_width >= tab_width) {
37     int stop_size = INTEGER(tab_stops)[stop_idx];
38     if(stop_size < 1)
39       error("Internal Error: stop size less than 1.");  // nocov
40     if(tab_width > FANSI_int_max - stop_size)
41       error("Integer overflow when attempting to compute tab width."); // nocov
42     tab_width += stop_size;
43     if(stop_idx < stops - 1) stop_idx++;
44   }
45   return tab_width - state.pos_width;
46 }
47 
FANSI_tabs_as_spaces(SEXP vec,SEXP tab_stops,struct FANSI_buff * buff,SEXP warn,SEXP term_cap,SEXP ctl)48 SEXP FANSI_tabs_as_spaces(
49   SEXP vec, SEXP tab_stops, struct FANSI_buff * buff,  SEXP warn,
50   SEXP term_cap, SEXP ctl
51 ) {
52   if(TYPEOF(vec) != STRSXP)
53     error("Argument 'vec' should be a character vector"); // nocov
54   R_xlen_t len = XLENGTH(vec);
55   R_xlen_t len_stops = XLENGTH(tab_stops);
56 
57   const char * source;
58   int tabs_in_str = 0;
59   int max_tab_stop = 1;
60 
61   SEXP res_sxp = vec;
62 
63   PROTECT_INDEX ipx;
64   PROTECT_WITH_INDEX(res_sxp, &ipx);  // reserve spot if we need to alloc later
65 
66   for(R_xlen_t i = 0; i < len; ++i) {
67     FANSI_interrupt(i);
68     int tab_count = 0;
69 
70     SEXP chr = STRING_ELT(vec, i);
71     if(chr == NA_STRING) continue;
72     FANSI_check_chrsxp(chr, i);
73 
74     source = CHAR(chr);
75 
76     while(*source && (source = strchr(source, '\t'))) {
77       if(!tabs_in_str) {
78         tabs_in_str = 1;
79         REPROTECT(res_sxp = duplicate(vec), ipx);
80         for(R_xlen_t j = 0; j < len_stops; ++j) {
81           if(INTEGER(tab_stops)[j] > max_tab_stop)
82             max_tab_stop = INTEGER(tab_stops)[j];
83         }
84       }
85       ++tab_count;
86       ++source;
87     }
88     if(tab_count) {
89       // Need to convert to UTF8 so width calcs work
90 
91       const char * string = CHAR(chr);
92 
93       // Figure out possible size of buffer, allowing max_tab_stop for every
94       // tab, which should over-allocate
95 
96       size_t new_buff_size = LENGTH(chr);
97       int tab_extra = max_tab_stop - 1;
98 
99       for(int k = 0; k < tab_count; ++k) {
100         if(new_buff_size > (size_t) (FANSI_int_max - tab_extra))
101           error(
102             "%s%s",
103             "Converting tabs to spaces will cause string to be longer than ",
104             "allowed INT_MAX."
105           );
106         new_buff_size += tab_extra;
107       }
108       ++new_buff_size;   // Room for NULL
109 
110       FANSI_size_buff(buff, new_buff_size);
111 
112       SEXP R_true = PROTECT(ScalarLogical(1));
113       SEXP R_one = PROTECT(ScalarInteger(1));
114       struct FANSI_state state = FANSI_state_init_full(
115         string, warn, term_cap, R_true, R_true, R_one, ctl
116       );
117       UNPROTECT(2);
118 
119       char cur_chr;
120 
121       char * buff_track, * buff_start;
122       buff_track = buff_start = buff->buff;
123 
124       int last_byte = state.pos_byte;
125       int warn_old = state.warn;
126 
127       while(1) {
128         cur_chr = state.string[state.pos_byte];
129         int extra_spaces = 0;
130 
131         if(cur_chr == '\t') {
132           extra_spaces = FANSI_tab_width(state, tab_stops);
133         } else if (cur_chr == '\n') {
134           state = FANSI_reset_width(state);
135         }
136         // Write string
137 
138         if(cur_chr == '\t' || !cur_chr) {
139           int write_bytes = state.pos_byte - last_byte;
140           memcpy(buff_track, state.string + last_byte, write_bytes);
141           buff_track += write_bytes;
142 
143           // consume tab and advance
144 
145           state.warn = 0;
146           state = FANSI_read_next(state);
147           state.warn = warn_old;
148           cur_chr = state.string[state.pos_byte];
149           state = FANSI_inc_width(state, extra_spaces);
150           last_byte = state.pos_byte;
151 
152           // actually write the extra spaces
153 
154           while(extra_spaces) {
155             --extra_spaces;
156             *buff_track = ' ';
157             ++buff_track;
158           }
159           if(!cur_chr) *buff_track = 0;
160         }
161         if(!cur_chr) break;
162         state = FANSI_read_next(state);
163       }
164       // Write the CHARSXP
165 
166       cetype_t chr_type = CE_NATIVE;
167       if(state.has_utf8) chr_type = CE_UTF8;
168       FANSI_check_chr_size(buff_start, buff_track, i);
169       SEXP chr_sxp = PROTECT(
170         mkCharLenCE(buff_start, (int) (buff_track - buff_start), chr_type)
171       );
172       SET_STRING_ELT(res_sxp, i, chr_sxp);
173       UNPROTECT(1);
174     }
175   }
176   UNPROTECT(1);
177   return res_sxp;
178 }
FANSI_tabs_as_spaces_ext(SEXP vec,SEXP tab_stops,SEXP warn,SEXP term_cap,SEXP ctl)179 SEXP FANSI_tabs_as_spaces_ext(
180   SEXP vec, SEXP tab_stops, SEXP warn, SEXP term_cap, SEXP ctl
181 ) {
182   struct FANSI_buff buff = {.len = 0};
183 
184   return FANSI_tabs_as_spaces(vec, tab_stops, &buff, warn, term_cap, ctl);
185 }
186 
187