1 #include <string>
2 #include <vector>
3 #include <algorithm>
4 #include <cwctype>
5 #include <cctype>
6 #include "sedit.h"
7 
8 /*
9 
10   copyright (c) 2006 squell <squell@alumina.nl>
11 
12   use, modification, copying and distribution of this software is permitted
13   under the conditions described in the file 'COPYING'.
14 
15 */
16 
17 using namespace std;
18 using namespace charset;
19 
20  /*
21    <wctype.h> was added in the 1994 Amd. to C; __STDC_VERSION__ >= 199409L
22    It is therefore part of the C++ standard, but support varies; autoconf?
23  */
24 
25 #if defined(__FreeBSD__) && (__FreeBSD__ < 5) \
26  || defined(__DJGPP__) || defined(__BORLANDC__)
27 #    define to_upper toupper
28 #    define to_lower tolower
29 #    define is_(what, c) is##what(c)
30 #else
31 #    define to_upper towupper
32 #    define to_lower towlower
33 #    define is_(what, c) isw##what(c)
34 #endif
35 
36 namespace {
37 
38 enum style { as_is, name, lowr, split };
39 
40 struct filtered_char {                           // filter low-ascii
operator ()__anon9e48b1cf0111::filtered_char41     bool operator()(wchar_t c)
42     { return c == '_' || is_(cntrl, c); }
43 };
44 
45 struct both_space {                              // filter ascii space
operator ()__anon9e48b1cf0111::both_space46     bool operator()(wchar_t a, wchar_t b)
47     { return is_(space, a) && is_(space, b); }
48 };
49 
50 struct char_to_lower {
operator ()__anon9e48b1cf0111::char_to_lower51     wchar_t operator()(wchar_t c)
52     { return to_lower(c); }
53 };
54 
55  // compress("    bla    bla  ") -> "bla bla"
56 
compress(wstring & s)57 void compress(wstring& s)
58 {
59     wstring::iterator p = unique(s.begin(), s.end(), both_space());
60     if(p != s.begin() && is_(space, p[-1])) --p;
61     s.erase(p, s.end());
62     if(s.length() > 0 && is_(space, s[0]))
63         s.erase(s.begin());
64 }
65 
66  // noleadzero("(0300/0004)=0075") -> "300/4=75"
67 
noleadzero(wstring & s)68 void noleadzero(wstring& s)
69 {
70     const wchar_t zero[] = L"0";
71     wstring::size_type p, q = 0;
72     do {
73         p = s.find_first_of(zero, q);
74         q = s.find_first_not_of(zero, p);
75         if(q == p)
76             return;
77         if(p == 0 || !is_(digit, s[p-1])) {
78             s.erase(p, q-p);
79             if(s.empty() || !is_(digit,s[p]))
80                 s.insert(p, zero);
81             q = p+1;
82         }
83     } while(1);
84 }
85 
86  // capitalize("hElLo wOrLd") -> "Hello World"
87 
capitalize(wstring & s)88 void capitalize(wstring& s)
89 {
90     bool new_w = true;
91     for(wstring::iterator p = s.begin(); p != s.end(); ++p) {
92         *p = new_w? to_upper(*p):to_lower(*p);
93         new_w = is_(space, *p) || !is_(alnum, *p) && new_w;
94     }
95 }
96 
97  // padcamels("ReformatAStringLikeThis") -> "Reformat A String Like This"
98 
padcamels(wstring & s)99 void padcamels(wstring& s)
100 {
101     wstring::const_iterator p;
102     bool word = false;
103     wstring r;
104     for(p = s.begin(); p != s.end(); r.push_back(*p++)) {
105         if(is_(upper, *p) && word)
106             r.push_back(' ');
107         word = !is_(space, *p);
108     }
109     s.swap(r);
110 }
111 
112 
113  // padnumeric("(300/4)=75", 4) -> "0300/0004=0075"
114 
padnumeric(wstring & s,unsigned pad)115 void padnumeric(wstring& s, unsigned pad)
116 {
117     const wchar_t digits[] = L"0123456789";
118     wstring::size_type p, q = 0;
119     do {
120         p = s.find_first_of    (digits, q);
121         q = s.find_first_not_of(digits, p);
122         wstring::size_type l = ((q==wstring::npos)? s.length() : q) - p;
123         if(q == p)
124              return;
125         if(l < pad) {
126              s.insert(p, pad-l, '0');
127              l = pad;
128         }
129         q = p + l;
130     } while(1);
131 }
132 
codepoint(wstring::const_iterator & p,wstring::const_iterator const q,int digits)133 wchar_t codepoint(wstring::const_iterator& p, wstring::const_iterator const q, int digits)
134 {
135     long val = 0;
136     for( ; digits-- && p < q; ++p) {
137         wchar_t c = to_upper(*p);
138         if(is_(digit, c)) c -= '0';
139         else if(c >= 'A' && c <= 'F') c -= 'A'-10;
140         else break;
141         val = val << 4 | (c&0xF);
142     }
143     return val;
144 }
145 
146 } // end of anon. namespace
147 
148 namespace stredit {
149 
edit(const wstring & format,bool atomic) const150 function::result format::edit(const wstring& format, bool atomic) const
151 {
152     conv<wchar_t> build;
153     build.reserve(format.length());
154     int validity = -true;
155 
156     for(ptr p = format.begin(); p < format.end(); ) {
157         switch(wchar_t c = *p++) {
158         case '\\':                              // leaves trailing slashes
159             if(p != format.end()) switch(c = *p++) {
160             case '\\': c = '\\'; break;
161             case 'a':  c = '\a'; break;
162             case 'b':  c = '\b'; break;
163             case 'f':  c = '\f'; break;
164             case 'n':  c = '\n'; break;    /* ?append carriage returns? */
165             case 'r':  c = '\r'; break;
166             case 't':  c = '\t'; break;
167             case 'v':  c = '\v'; break;
168             case 'u':  c = codepoint(p, format.end(), 4); break;
169             case 'U':  c = codepoint(p, format.end(), 8); break;
170             }
171         default:
172             build += c;
173             break;
174         case prefix:
175             result subst = code(--p, format.end());
176             build += subst;
177             if(!subst) {
178                 validity = validity > false;
179                 continue;
180             }
181         }
182         if(!atomic) validity = true;
183     }
184     return result(build, validity);
185 }
186 
code(ptr & p,ptr end) const187 function::result format::code(ptr& p, ptr end) const
188 {
189     vector<wstring> alt;
190     style caps       = as_is;
191     bool raw         = false;
192     unsigned num_pad = 0;
193 
194     for(++p; p != end; ) {
195         switch(wchar_t c = *p++) {
196         case '_': raw  = true;      continue;
197         case '+': caps = name;      continue;
198         case '-': caps = lowr;      continue;
199         case '*': caps = split;     continue;
200         case '#': ++num_pad;        continue;
201         case prefix:
202             return conv<wchar_t>(1, prefix);
203 
204         case '|': {
205             ptr q = matching(p-1, end);
206             if(alt.size() < 25)                 // artificial limit
207                 alt.push_back(wstring(p, q));
208             if(q == end) break;
209             p = ++q;
210             continue;
211           }
212 
213         default :
214             result subst = var(--p, end);
215             if(!subst.good()) for(unsigned i = 0; i < alt.size(); ++i)
216                 if(result tmp = edit(alt[i], true)) {
217                     subst = tmp;
218                     break;
219                 }
220             wstring s = conv<wchar_t>(subst);
221             if(!raw) {                          // remove gunk
222                 replace_if(s.begin(), s.end(), filtered_char(), ' ');
223                 compress(s);
224             }
225             if(caps == split)
226                 padcamels(s);
227             if(caps == name)
228                 capitalize(s);
229             if(caps == lowr)
230                 transform(s.begin(), s.end(), s.begin(), char_to_lower());
231             if(num_pad > 0)
232                 noleadzero(s);
233             padnumeric(s, num_pad);
234             return result(conv<wchar_t>(s), subst.good());
235         }
236         break;
237     }
238     return false;
239 }
240 
matching(ptr p,ptr end) const241 format::ptr format::matching(ptr p, ptr end) const
242 {
243     unsigned nesting = 1, one = 1;
244     wchar_t delim = *p++;
245     while(p != end) {
246         switch(wchar_t c = *p++) {
247         case prefix:                            // start of nesting?
248             one = -one; break;
249         case '\\':                              // ignore escaped char
250             if(p == end) break;
251             ++p;
252         default  :
253             if(c == delim && (nesting-=one) == 0) return --p;
254             one = 1;
255         }
256     }
257     return p;
258 }
259 
260 } // end namespace
261 
262