1 /********************************************************************************
2 *                                                                               *
3 *                  T a b - S t o p s   M a n i p u l a t i o n s                *
4 *                                                                               *
5 *********************************************************************************
6 * Copyright (C) 1997,2021 by Jeroen van der Zijp.   All Rights Reserved.        *
7 *********************************************************************************
8 * This library is free software; you can redistribute it and/or modify          *
9 * it under the terms of the GNU Lesser General Public License as published by   *
10 * the Free Software Foundation; either version 3 of the License, or             *
11 * (at your option) any later version.                                           *
12 *                                                                               *
13 * This library is distributed in the hope that it will be useful,               *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of                *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                 *
16 * GNU Lesser General Public License for more details.                           *
17 *                                                                               *
18 * You should have received a copy of the GNU Lesser General Public License      *
19 * along with this program.  If not, see <http://www.gnu.org/licenses/>          *
20 ********************************************************************************/
21 #include "xincs.h"
22 #include "fxver.h"
23 #include "fxdefs.h"
24 #include "fxmath.h"
25 #include "fxascii.h"
26 #include "fxunicode.h"
27 #include "FXArray.h"
28 #include "FXHash.h"
29 #include "FXStream.h"
30 #include "FXString.h"
31 
32 
33 /*
34   Notes:
35   - Expand tabs to spaces, or compress runs of spaces to tabs, given tab stops
36     and UTF8 encoding.
37   - Assume UTF8 characters account for 1 column.
38   - Complex shifting and tabbification of text.
39 */
40 
41 using namespace FX;
42 
43 /*******************************************************************************/
44 
45 namespace FX {
46 
47 // Expand tabs with the equivalent amount of spaces.
48 // UTF8 encoded characters are counted as one column.
detab(const FXString & text,FXint tabcols)49 FXString FXString::detab(const FXString& text,FXint tabcols){
50   FXString result;
51   FXint is,d,s;
52   FXuchar c;
53   is=d=s=0;
54   while(s<text.length()){
55     c=text[s++];
56     if(c=='\n'){
57       d++;
58       is=0;
59       continue;
60       }
61     if(c=='\t'){
62       do{ d++; }while(++is%tabcols);
63       continue;
64       }
65     d++;
66     is++;
67     if(c<0xC0) continue;
68     d++; s++;
69     if(c<0xE0) continue;
70     d++; s++;
71     if(c<0xF0) continue;
72     d++; s++;
73     }
74   result.length(d);
75   is=d=s=0;
76   while(s<text.length()){
77     c=text[s++];
78     if(c=='\n'){
79       result[d++]=c;
80       is=0;
81       continue;
82       }
83     if(c=='\t'){
84       do{ result[d++]=' '; }while(++is%tabcols);
85       continue;
86       }
87     result[d++]=c;
88     is++;
89     if(c<0xC0) continue;
90     result[d++]=text[s++];
91     if(c<0xE0) continue;
92     result[d++]=text[s++];
93     if(c<0xF0) continue;
94     result[d++]=text[s++];
95     }
96   FXASSERT(d==result.length());
97   return result;
98   }
99 
100 
101 // Compress runs of more than 2 spaces with tabs.
102 // UTF8 characters are counted as one column.
entab(const FXString & text,FXint tabcols)103 FXString FXString::entab(const FXString& text,FXint tabcols){
104   FXString result;
105   FXint is,ie,d,s,ts;
106   FXuchar c;
107   is=ie=d=s=0;
108   while(s<text.length()){
109     c=text[s];
110     s++;
111     d++;
112     ie++;
113     if(c==' '){                                 // Accumulate spaces
114       if((ie-is)<3) continue;                   // Run of less than 3
115       ts=is+tabcols-is%tabcols;
116       if(ie<ts) continue;                       // Not crossing tabstop
117       d+=1+is-ts;                               // Adjust
118       is=ts;                                    // Advance to tabstop
119       continue;
120       }
121     if(c=='\t'){                                // Keep the tab
122       d+=is-ie;                                 // Adjust
123       d++;
124       ie--;
125       ie+=tabcols-ie%tabcols;                   // New tab-column
126       is+=tabcols-is%tabcols;
127       if(is==ie) continue;                      // Reached tabstop
128       is+=tabcols-is%tabcols;
129       d++;
130       continue;
131       }
132     if(c=='\n'){                                // Reset columns
133       is=0;
134       ie=0;
135       continue;
136       }
137     is=ie;                                      // One UTF8 character
138     if(c<0xC0) continue;
139     d++;
140     s++;
141     if(c<0xE0) continue;
142     d++;
143     s++;
144     if(c<0xF0) continue;
145     d++;
146     s++;
147     }
148   result.length(d);
149   is=ie=d=s=0;
150   while(s<text.length()){
151     c=result[d]=text[s];
152     s++;
153     d++;
154     ie++;
155     if(c==' '){                                 // Accumulate spaces
156       if((ie-is)<3) continue;                   // Run of less than 3
157       ts=is+tabcols-is%tabcols;
158       if(ie<ts) continue;                       // Not crossing tabstop
159       result[d+is-ie]='\t';                     // Write a tab at start of run
160       d+=1+is-ts;                               // Adjust
161       is=ts;                                    // Advance to tabstop
162       continue;
163       }
164     if(c=='\t'){                                // Keep the tab
165       d+=is-ie;                                 // Adjust
166       result[d++]='\t';
167       ie--;
168       ie+=tabcols-ie%tabcols;
169       is+=tabcols-is%tabcols;
170       if(is==ie) continue;                      // Reached tabstop
171       is+=tabcols-is%tabcols;
172       result[d++]='\t';
173       continue;
174       }
175     if(c=='\n'){                                // Reset columns
176       is=0;
177       ie=0;
178       continue;
179       }
180     is=ie;                                      // One UTF8 character
181     if(c<0xC0) continue;
182     result[d]=text[s];
183     d++;
184     s++;
185     if(c<0xE0) continue;
186     result[d]=text[s];
187     d++;
188     s++;
189     if(c<0xF0) continue;
190     result[d]=text[s];
191     d++;
192     s++;
193     }
194   FXASSERT(d<=result.length());
195   return result;
196   }
197 
198 // Retabbify line
199 // Assume original starting column of the string is indent, and the output
200 // starting column is outdent; this affects accounting of the tab-stops in the
201 // input string, and of the output string relative to the first character.
202 // Along the way, extra columns may be inserted or removed as per shift.
203 // If shift=0, indent=0, and outdent=0, this routine has the effect of harmonizing
204 // the output of white space according to the current tab setting ("clean indent").
205 // For now, we assume all unicode characters to be one column.
tabbify(const FXString & text,FXint tabcols,FXint indent,FXint outdent,FXint shift,FXbool tabs)206 FXString FXString::tabbify(const FXString& text,FXint tabcols,FXint indent,FXint outdent,FXint shift,FXbool tabs){
207   FXString result;
208   FXint oec=outdent+shift;
209   FXint osc=outdent;
210   FXint isc=indent;
211   FXint iec=indent;
212   FXint s=0;
213   FXint d=0;
214   FXint ntabs;
215   FXuchar c;
216   while(s<text.length()){
217     c=text[s++];
218     if(c==' '){ iec++; continue; }                              // Space is one column
219     if(c=='\t'){ iec+=tabcols-iec%tabcols; continue; }          // Tabs is multiple columns
220     oec+=(iec-isc);
221     if(osc<oec){                                                // Owe some spaces
222       if(tabs && 2<(oec-osc)){
223         ntabs=oec/tabcols-osc/tabcols;                          // How many tabs to emit
224         if(ntabs){ d+=ntabs; osc=(oec/tabcols)*tabcols; }
225         }
226       d+=oec-osc;
227       osc=oec;
228       }
229     if(c=='\n'){                                                // Emit a newline and reset columns
230       d++;
231       isc=indent;
232       iec=indent;
233       osc=outdent;
234       oec=outdent+shift;
235       continue;
236       }
237     isc=++iec;                                                  // Advance input columns
238     osc=++oec;                                                  // Advance output columns
239     d++;                                                        // Copy character
240     if(c<0xC0) continue;
241     d++;
242     s++;
243     if(c<0xE0) continue;
244     d++;
245     s++;
246     if(c<0xF0) continue;
247     d++;
248     s++;
249     }
250   result.length(d);
251   oec=outdent+shift;
252   osc=outdent;
253   isc=indent;
254   iec=indent;
255   s=0;
256   d=0;
257   while(s<text.length()){
258     c=text[s++];
259     if(c==' '){ iec++; continue; }                              // Space is one column
260     if(c=='\t'){ iec+=tabcols-iec%tabcols; continue; }          // Tabs is multiple columns
261     oec+=(iec-isc);
262     if(osc<oec){                                                // Owe some spaces
263       if(tabs && 2<(oec-osc)){
264         ntabs=oec/tabcols-osc/tabcols;                          // How many tabs to emit
265         if(ntabs){
266           do{ result[d++]='\t'; }while(--ntabs);
267           osc=(oec/tabcols)*tabcols;                            // Advance starting column to the last tabstop
268           }
269         }
270       while(osc<oec){ result[d++]=' '; osc++; }                 // Emit spaces to reach current column
271       }
272     if(c=='\n'){                                                // Emit a newline and reset columns
273       result[d++]='\n';
274       isc=indent;
275       iec=indent;
276       osc=outdent;
277       oec=outdent+shift;
278       continue;
279       }
280     isc=++iec;                                                  // Advance input columns
281     osc=++oec;                                                  // Advance output columns
282     result[d++]=c;                                              // Copy character
283     if(c<0xC0) continue;
284     result[d++]=text[s++];
285     if(c<0xE0) continue;
286     result[d++]=text[s++];
287     if(c<0xF0) continue;
288     result[d++]=text[s++];
289     }
290   FXASSERT(d<=result.length());
291   result.trunc(d);
292   return result;
293   }
294 
295 
296 }
297