1 /********************************************************************************
2 * *
3 * T a b - S t o p s M a n i p u l a t i o n s *
4 * *
5 *********************************************************************************
6 * Copyright (C) 1997,2021 by Jeroen van der Zijp. All Rights Reserved. *
7 *********************************************************************************
8 * This library is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU Lesser General Public License as published by *
10 * the Free Software Foundation; either version 3 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This library is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU Lesser General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU Lesser General Public License *
19 * along with this program. If not, see <http://www.gnu.org/licenses/> *
20 ********************************************************************************/
21 #include "xincs.h"
22 #include "fxver.h"
23 #include "fxdefs.h"
24 #include "fxmath.h"
25 #include "fxascii.h"
26 #include "fxunicode.h"
27 #include "FXArray.h"
28 #include "FXHash.h"
29 #include "FXStream.h"
30 #include "FXString.h"
31
32
33 /*
34 Notes:
35 - Expand tabs to spaces, or compress runs of spaces to tabs, given tab stops
36 and UTF8 encoding.
37 - Assume UTF8 characters account for 1 column.
38 - Complex shifting and tabbification of text.
39 */
40
41 using namespace FX;
42
43 /*******************************************************************************/
44
45 namespace FX {
46
47 // Expand tabs with the equivalent amount of spaces.
48 // UTF8 encoded characters are counted as one column.
detab(const FXString & text,FXint tabcols)49 FXString FXString::detab(const FXString& text,FXint tabcols){
50 FXString result;
51 FXint is,d,s;
52 FXuchar c;
53 is=d=s=0;
54 while(s<text.length()){
55 c=text[s++];
56 if(c=='\n'){
57 d++;
58 is=0;
59 continue;
60 }
61 if(c=='\t'){
62 do{ d++; }while(++is%tabcols);
63 continue;
64 }
65 d++;
66 is++;
67 if(c<0xC0) continue;
68 d++; s++;
69 if(c<0xE0) continue;
70 d++; s++;
71 if(c<0xF0) continue;
72 d++; s++;
73 }
74 result.length(d);
75 is=d=s=0;
76 while(s<text.length()){
77 c=text[s++];
78 if(c=='\n'){
79 result[d++]=c;
80 is=0;
81 continue;
82 }
83 if(c=='\t'){
84 do{ result[d++]=' '; }while(++is%tabcols);
85 continue;
86 }
87 result[d++]=c;
88 is++;
89 if(c<0xC0) continue;
90 result[d++]=text[s++];
91 if(c<0xE0) continue;
92 result[d++]=text[s++];
93 if(c<0xF0) continue;
94 result[d++]=text[s++];
95 }
96 FXASSERT(d==result.length());
97 return result;
98 }
99
100
101 // Compress runs of more than 2 spaces with tabs.
102 // UTF8 characters are counted as one column.
entab(const FXString & text,FXint tabcols)103 FXString FXString::entab(const FXString& text,FXint tabcols){
104 FXString result;
105 FXint is,ie,d,s,ts;
106 FXuchar c;
107 is=ie=d=s=0;
108 while(s<text.length()){
109 c=text[s];
110 s++;
111 d++;
112 ie++;
113 if(c==' '){ // Accumulate spaces
114 if((ie-is)<3) continue; // Run of less than 3
115 ts=is+tabcols-is%tabcols;
116 if(ie<ts) continue; // Not crossing tabstop
117 d+=1+is-ts; // Adjust
118 is=ts; // Advance to tabstop
119 continue;
120 }
121 if(c=='\t'){ // Keep the tab
122 d+=is-ie; // Adjust
123 d++;
124 ie--;
125 ie+=tabcols-ie%tabcols; // New tab-column
126 is+=tabcols-is%tabcols;
127 if(is==ie) continue; // Reached tabstop
128 is+=tabcols-is%tabcols;
129 d++;
130 continue;
131 }
132 if(c=='\n'){ // Reset columns
133 is=0;
134 ie=0;
135 continue;
136 }
137 is=ie; // One UTF8 character
138 if(c<0xC0) continue;
139 d++;
140 s++;
141 if(c<0xE0) continue;
142 d++;
143 s++;
144 if(c<0xF0) continue;
145 d++;
146 s++;
147 }
148 result.length(d);
149 is=ie=d=s=0;
150 while(s<text.length()){
151 c=result[d]=text[s];
152 s++;
153 d++;
154 ie++;
155 if(c==' '){ // Accumulate spaces
156 if((ie-is)<3) continue; // Run of less than 3
157 ts=is+tabcols-is%tabcols;
158 if(ie<ts) continue; // Not crossing tabstop
159 result[d+is-ie]='\t'; // Write a tab at start of run
160 d+=1+is-ts; // Adjust
161 is=ts; // Advance to tabstop
162 continue;
163 }
164 if(c=='\t'){ // Keep the tab
165 d+=is-ie; // Adjust
166 result[d++]='\t';
167 ie--;
168 ie+=tabcols-ie%tabcols;
169 is+=tabcols-is%tabcols;
170 if(is==ie) continue; // Reached tabstop
171 is+=tabcols-is%tabcols;
172 result[d++]='\t';
173 continue;
174 }
175 if(c=='\n'){ // Reset columns
176 is=0;
177 ie=0;
178 continue;
179 }
180 is=ie; // One UTF8 character
181 if(c<0xC0) continue;
182 result[d]=text[s];
183 d++;
184 s++;
185 if(c<0xE0) continue;
186 result[d]=text[s];
187 d++;
188 s++;
189 if(c<0xF0) continue;
190 result[d]=text[s];
191 d++;
192 s++;
193 }
194 FXASSERT(d<=result.length());
195 return result;
196 }
197
198 // Retabbify line
199 // Assume original starting column of the string is indent, and the output
200 // starting column is outdent; this affects accounting of the tab-stops in the
201 // input string, and of the output string relative to the first character.
202 // Along the way, extra columns may be inserted or removed as per shift.
203 // If shift=0, indent=0, and outdent=0, this routine has the effect of harmonizing
204 // the output of white space according to the current tab setting ("clean indent").
205 // For now, we assume all unicode characters to be one column.
tabbify(const FXString & text,FXint tabcols,FXint indent,FXint outdent,FXint shift,FXbool tabs)206 FXString FXString::tabbify(const FXString& text,FXint tabcols,FXint indent,FXint outdent,FXint shift,FXbool tabs){
207 FXString result;
208 FXint oec=outdent+shift;
209 FXint osc=outdent;
210 FXint isc=indent;
211 FXint iec=indent;
212 FXint s=0;
213 FXint d=0;
214 FXint ntabs;
215 FXuchar c;
216 while(s<text.length()){
217 c=text[s++];
218 if(c==' '){ iec++; continue; } // Space is one column
219 if(c=='\t'){ iec+=tabcols-iec%tabcols; continue; } // Tabs is multiple columns
220 oec+=(iec-isc);
221 if(osc<oec){ // Owe some spaces
222 if(tabs && 2<(oec-osc)){
223 ntabs=oec/tabcols-osc/tabcols; // How many tabs to emit
224 if(ntabs){ d+=ntabs; osc=(oec/tabcols)*tabcols; }
225 }
226 d+=oec-osc;
227 osc=oec;
228 }
229 if(c=='\n'){ // Emit a newline and reset columns
230 d++;
231 isc=indent;
232 iec=indent;
233 osc=outdent;
234 oec=outdent+shift;
235 continue;
236 }
237 isc=++iec; // Advance input columns
238 osc=++oec; // Advance output columns
239 d++; // Copy character
240 if(c<0xC0) continue;
241 d++;
242 s++;
243 if(c<0xE0) continue;
244 d++;
245 s++;
246 if(c<0xF0) continue;
247 d++;
248 s++;
249 }
250 result.length(d);
251 oec=outdent+shift;
252 osc=outdent;
253 isc=indent;
254 iec=indent;
255 s=0;
256 d=0;
257 while(s<text.length()){
258 c=text[s++];
259 if(c==' '){ iec++; continue; } // Space is one column
260 if(c=='\t'){ iec+=tabcols-iec%tabcols; continue; } // Tabs is multiple columns
261 oec+=(iec-isc);
262 if(osc<oec){ // Owe some spaces
263 if(tabs && 2<(oec-osc)){
264 ntabs=oec/tabcols-osc/tabcols; // How many tabs to emit
265 if(ntabs){
266 do{ result[d++]='\t'; }while(--ntabs);
267 osc=(oec/tabcols)*tabcols; // Advance starting column to the last tabstop
268 }
269 }
270 while(osc<oec){ result[d++]=' '; osc++; } // Emit spaces to reach current column
271 }
272 if(c=='\n'){ // Emit a newline and reset columns
273 result[d++]='\n';
274 isc=indent;
275 iec=indent;
276 osc=outdent;
277 oec=outdent+shift;
278 continue;
279 }
280 isc=++iec; // Advance input columns
281 osc=++oec; // Advance output columns
282 result[d++]=c; // Copy character
283 if(c<0xC0) continue;
284 result[d++]=text[s++];
285 if(c<0xE0) continue;
286 result[d++]=text[s++];
287 if(c<0xF0) continue;
288 result[d++]=text[s++];
289 }
290 FXASSERT(d<=result.length());
291 result.trunc(d);
292 return result;
293 }
294
295
296 }
297