1 #include <string>
2 #include <vector>
3 #include <algorithm>
4 #include <cwctype>
5 #include <cctype>
6 #include "sedit.h"
7
8 /*
9
10 copyright (c) 2006 squell <squell@alumina.nl>
11
12 use, modification, copying and distribution of this software is permitted
13 under the conditions described in the file 'COPYING'.
14
15 */
16
17 using namespace std;
18 using namespace charset;
19
20 /*
21 <wctype.h> was added in the 1994 Amd. to C; __STDC_VERSION__ >= 199409L
22 It is therefore part of the C++ standard, but support varies; autoconf?
23 */
24
25 #if defined(__FreeBSD__) && (__FreeBSD__ < 5) \
26 || defined(__DJGPP__) || defined(__BORLANDC__)
27 # define to_upper toupper
28 # define to_lower tolower
29 # define is_(what, c) is##what(c)
30 #else
31 # define to_upper towupper
32 # define to_lower towlower
33 # define is_(what, c) isw##what(c)
34 #endif
35
36 namespace {
37
38 enum style { as_is, name, lowr, split };
39
40 struct filtered_char { // filter low-ascii
operator ()__anon9e48b1cf0111::filtered_char41 bool operator()(wchar_t c)
42 { return c == '_' || is_(cntrl, c); }
43 };
44
45 struct both_space { // filter ascii space
operator ()__anon9e48b1cf0111::both_space46 bool operator()(wchar_t a, wchar_t b)
47 { return is_(space, a) && is_(space, b); }
48 };
49
50 struct char_to_lower {
operator ()__anon9e48b1cf0111::char_to_lower51 wchar_t operator()(wchar_t c)
52 { return to_lower(c); }
53 };
54
55 // compress(" bla bla ") -> "bla bla"
56
compress(wstring & s)57 void compress(wstring& s)
58 {
59 wstring::iterator p = unique(s.begin(), s.end(), both_space());
60 if(p != s.begin() && is_(space, p[-1])) --p;
61 s.erase(p, s.end());
62 if(s.length() > 0 && is_(space, s[0]))
63 s.erase(s.begin());
64 }
65
66 // noleadzero("(0300/0004)=0075") -> "300/4=75"
67
noleadzero(wstring & s)68 void noleadzero(wstring& s)
69 {
70 const wchar_t zero[] = L"0";
71 wstring::size_type p, q = 0;
72 do {
73 p = s.find_first_of(zero, q);
74 q = s.find_first_not_of(zero, p);
75 if(q == p)
76 return;
77 if(p == 0 || !is_(digit, s[p-1])) {
78 s.erase(p, q-p);
79 if(s.empty() || !is_(digit,s[p]))
80 s.insert(p, zero);
81 q = p+1;
82 }
83 } while(1);
84 }
85
86 // capitalize("hElLo wOrLd") -> "Hello World"
87
capitalize(wstring & s)88 void capitalize(wstring& s)
89 {
90 bool new_w = true;
91 for(wstring::iterator p = s.begin(); p != s.end(); ++p) {
92 *p = new_w? to_upper(*p):to_lower(*p);
93 new_w = is_(space, *p) || !is_(alnum, *p) && new_w;
94 }
95 }
96
97 // padcamels("ReformatAStringLikeThis") -> "Reformat A String Like This"
98
padcamels(wstring & s)99 void padcamels(wstring& s)
100 {
101 wstring::const_iterator p;
102 bool word = false;
103 wstring r;
104 for(p = s.begin(); p != s.end(); r.push_back(*p++)) {
105 if(is_(upper, *p) && word)
106 r.push_back(' ');
107 word = !is_(space, *p);
108 }
109 s.swap(r);
110 }
111
112
113 // padnumeric("(300/4)=75", 4) -> "0300/0004=0075"
114
padnumeric(wstring & s,unsigned pad)115 void padnumeric(wstring& s, unsigned pad)
116 {
117 const wchar_t digits[] = L"0123456789";
118 wstring::size_type p, q = 0;
119 do {
120 p = s.find_first_of (digits, q);
121 q = s.find_first_not_of(digits, p);
122 wstring::size_type l = ((q==wstring::npos)? s.length() : q) - p;
123 if(q == p)
124 return;
125 if(l < pad) {
126 s.insert(p, pad-l, '0');
127 l = pad;
128 }
129 q = p + l;
130 } while(1);
131 }
132
codepoint(wstring::const_iterator & p,wstring::const_iterator const q,int digits)133 wchar_t codepoint(wstring::const_iterator& p, wstring::const_iterator const q, int digits)
134 {
135 long val = 0;
136 for( ; digits-- && p < q; ++p) {
137 wchar_t c = to_upper(*p);
138 if(is_(digit, c)) c -= '0';
139 else if(c >= 'A' && c <= 'F') c -= 'A'-10;
140 else break;
141 val = val << 4 | (c&0xF);
142 }
143 return val;
144 }
145
146 } // end of anon. namespace
147
148 namespace stredit {
149
edit(const wstring & format,bool atomic) const150 function::result format::edit(const wstring& format, bool atomic) const
151 {
152 conv<wchar_t> build;
153 build.reserve(format.length());
154 int validity = -true;
155
156 for(ptr p = format.begin(); p < format.end(); ) {
157 switch(wchar_t c = *p++) {
158 case '\\': // leaves trailing slashes
159 if(p != format.end()) switch(c = *p++) {
160 case '\\': c = '\\'; break;
161 case 'a': c = '\a'; break;
162 case 'b': c = '\b'; break;
163 case 'f': c = '\f'; break;
164 case 'n': c = '\n'; break; /* ?append carriage returns? */
165 case 'r': c = '\r'; break;
166 case 't': c = '\t'; break;
167 case 'v': c = '\v'; break;
168 case 'u': c = codepoint(p, format.end(), 4); break;
169 case 'U': c = codepoint(p, format.end(), 8); break;
170 }
171 default:
172 build += c;
173 break;
174 case prefix:
175 result subst = code(--p, format.end());
176 build += subst;
177 if(!subst) {
178 validity = validity > false;
179 continue;
180 }
181 }
182 if(!atomic) validity = true;
183 }
184 return result(build, validity);
185 }
186
code(ptr & p,ptr end) const187 function::result format::code(ptr& p, ptr end) const
188 {
189 vector<wstring> alt;
190 style caps = as_is;
191 bool raw = false;
192 unsigned num_pad = 0;
193
194 for(++p; p != end; ) {
195 switch(wchar_t c = *p++) {
196 case '_': raw = true; continue;
197 case '+': caps = name; continue;
198 case '-': caps = lowr; continue;
199 case '*': caps = split; continue;
200 case '#': ++num_pad; continue;
201 case prefix:
202 return conv<wchar_t>(1, prefix);
203
204 case '|': {
205 ptr q = matching(p-1, end);
206 if(alt.size() < 25) // artificial limit
207 alt.push_back(wstring(p, q));
208 if(q == end) break;
209 p = ++q;
210 continue;
211 }
212
213 default :
214 result subst = var(--p, end);
215 if(!subst.good()) for(unsigned i = 0; i < alt.size(); ++i)
216 if(result tmp = edit(alt[i], true)) {
217 subst = tmp;
218 break;
219 }
220 wstring s = conv<wchar_t>(subst);
221 if(!raw) { // remove gunk
222 replace_if(s.begin(), s.end(), filtered_char(), ' ');
223 compress(s);
224 }
225 if(caps == split)
226 padcamels(s);
227 if(caps == name)
228 capitalize(s);
229 if(caps == lowr)
230 transform(s.begin(), s.end(), s.begin(), char_to_lower());
231 if(num_pad > 0)
232 noleadzero(s);
233 padnumeric(s, num_pad);
234 return result(conv<wchar_t>(s), subst.good());
235 }
236 break;
237 }
238 return false;
239 }
240
matching(ptr p,ptr end) const241 format::ptr format::matching(ptr p, ptr end) const
242 {
243 unsigned nesting = 1, one = 1;
244 wchar_t delim = *p++;
245 while(p != end) {
246 switch(wchar_t c = *p++) {
247 case prefix: // start of nesting?
248 one = -one; break;
249 case '\\': // ignore escaped char
250 if(p == end) break;
251 ++p;
252 default :
253 if(c == delim && (nesting-=one) == 0) return --p;
254 one = 1;
255 }
256 }
257 return p;
258 }
259
260 } // end namespace
261
262