1 /*
2 * Copyright (C) 2021 Brodie Gaslam
3 *
4 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
17 */
18
19 #include "fansi.h"
20
21 /*
22 * Determine how many spaces tab width should be
23 *
24 * state should be at a tab
25 */
FANSI_tab_width(struct FANSI_state state,SEXP tab_stops)26 int FANSI_tab_width(struct FANSI_state state, SEXP tab_stops) {
27 R_xlen_t stops = XLENGTH(tab_stops);
28 if(!stops)
29 error("Internal Error: must have at least one tab stop"); // nocov
30 if(*(state.string + state.pos_byte) != '\t')
31 error("Internal Error: computing tab width on not a tab"); // nocov
32
33 int tab_width = 0;
34 R_xlen_t stop_idx = 0;
35
36 while(state.pos_width >= tab_width) {
37 int stop_size = INTEGER(tab_stops)[stop_idx];
38 if(stop_size < 1)
39 error("Internal Error: stop size less than 1."); // nocov
40 if(tab_width > FANSI_int_max - stop_size)
41 error("Integer overflow when attempting to compute tab width."); // nocov
42 tab_width += stop_size;
43 if(stop_idx < stops - 1) stop_idx++;
44 }
45 return tab_width - state.pos_width;
46 }
47
FANSI_tabs_as_spaces(SEXP vec,SEXP tab_stops,struct FANSI_buff * buff,SEXP warn,SEXP term_cap,SEXP ctl)48 SEXP FANSI_tabs_as_spaces(
49 SEXP vec, SEXP tab_stops, struct FANSI_buff * buff, SEXP warn,
50 SEXP term_cap, SEXP ctl
51 ) {
52 if(TYPEOF(vec) != STRSXP)
53 error("Argument 'vec' should be a character vector"); // nocov
54 R_xlen_t len = XLENGTH(vec);
55 R_xlen_t len_stops = XLENGTH(tab_stops);
56
57 const char * source;
58 int tabs_in_str = 0;
59 int max_tab_stop = 1;
60
61 SEXP res_sxp = vec;
62
63 PROTECT_INDEX ipx;
64 PROTECT_WITH_INDEX(res_sxp, &ipx); // reserve spot if we need to alloc later
65
66 for(R_xlen_t i = 0; i < len; ++i) {
67 FANSI_interrupt(i);
68 int tab_count = 0;
69
70 SEXP chr = STRING_ELT(vec, i);
71 if(chr == NA_STRING) continue;
72 FANSI_check_chrsxp(chr, i);
73
74 source = CHAR(chr);
75
76 while(*source && (source = strchr(source, '\t'))) {
77 if(!tabs_in_str) {
78 tabs_in_str = 1;
79 REPROTECT(res_sxp = duplicate(vec), ipx);
80 for(R_xlen_t j = 0; j < len_stops; ++j) {
81 if(INTEGER(tab_stops)[j] > max_tab_stop)
82 max_tab_stop = INTEGER(tab_stops)[j];
83 }
84 }
85 ++tab_count;
86 ++source;
87 }
88 if(tab_count) {
89 // Need to convert to UTF8 so width calcs work
90
91 const char * string = CHAR(chr);
92
93 // Figure out possible size of buffer, allowing max_tab_stop for every
94 // tab, which should over-allocate
95
96 size_t new_buff_size = LENGTH(chr);
97 int tab_extra = max_tab_stop - 1;
98
99 for(int k = 0; k < tab_count; ++k) {
100 if(new_buff_size > (size_t) (FANSI_int_max - tab_extra))
101 error(
102 "%s%s",
103 "Converting tabs to spaces will cause string to be longer than ",
104 "allowed INT_MAX."
105 );
106 new_buff_size += tab_extra;
107 }
108 ++new_buff_size; // Room for NULL
109
110 FANSI_size_buff(buff, new_buff_size);
111
112 SEXP R_true = PROTECT(ScalarLogical(1));
113 SEXP R_one = PROTECT(ScalarInteger(1));
114 struct FANSI_state state = FANSI_state_init_full(
115 string, warn, term_cap, R_true, R_true, R_one, ctl
116 );
117 UNPROTECT(2);
118
119 char cur_chr;
120
121 char * buff_track, * buff_start;
122 buff_track = buff_start = buff->buff;
123
124 int last_byte = state.pos_byte;
125 int warn_old = state.warn;
126
127 while(1) {
128 cur_chr = state.string[state.pos_byte];
129 int extra_spaces = 0;
130
131 if(cur_chr == '\t') {
132 extra_spaces = FANSI_tab_width(state, tab_stops);
133 } else if (cur_chr == '\n') {
134 state = FANSI_reset_width(state);
135 }
136 // Write string
137
138 if(cur_chr == '\t' || !cur_chr) {
139 int write_bytes = state.pos_byte - last_byte;
140 memcpy(buff_track, state.string + last_byte, write_bytes);
141 buff_track += write_bytes;
142
143 // consume tab and advance
144
145 state.warn = 0;
146 state = FANSI_read_next(state);
147 state.warn = warn_old;
148 cur_chr = state.string[state.pos_byte];
149 state = FANSI_inc_width(state, extra_spaces);
150 last_byte = state.pos_byte;
151
152 // actually write the extra spaces
153
154 while(extra_spaces) {
155 --extra_spaces;
156 *buff_track = ' ';
157 ++buff_track;
158 }
159 if(!cur_chr) *buff_track = 0;
160 }
161 if(!cur_chr) break;
162 state = FANSI_read_next(state);
163 }
164 // Write the CHARSXP
165
166 cetype_t chr_type = CE_NATIVE;
167 if(state.has_utf8) chr_type = CE_UTF8;
168 FANSI_check_chr_size(buff_start, buff_track, i);
169 SEXP chr_sxp = PROTECT(
170 mkCharLenCE(buff_start, (int) (buff_track - buff_start), chr_type)
171 );
172 SET_STRING_ELT(res_sxp, i, chr_sxp);
173 UNPROTECT(1);
174 }
175 }
176 UNPROTECT(1);
177 return res_sxp;
178 }
FANSI_tabs_as_spaces_ext(SEXP vec,SEXP tab_stops,SEXP warn,SEXP term_cap,SEXP ctl)179 SEXP FANSI_tabs_as_spaces_ext(
180 SEXP vec, SEXP tab_stops, SEXP warn, SEXP term_cap, SEXP ctl
181 ) {
182 struct FANSI_buff buff = {.len = 0};
183
184 return FANSI_tabs_as_spaces(vec, tab_stops, &buff, warn, term_cap, ctl);
185 }
186
187