1 /***************************************************************************/
2 /* This code is part of WWW grabber called pavuk */
3 /* Copyright (c) 1997 - 2001 Stefan Ondrejicka */
4 /* Distributed under GPL 2 or later */
5 /***************************************************************************/
6
7 #include "config.h"
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12
13 #include "tools.h"
14 #include "tr.h"
15
16 #define HEXASC2HEXNR(x) (((x) >= '0' && (x) <= '9') ? \
17 ((x) - '0') : (tl_ascii_tolower(x) - 'a' + 10))
18
19 typedef enum
20 {
21 TR_ALPHA,
22 TR_ALNUM,
23 TR_NUM,
24 TR_XNUM,
25 TR_SPACE,
26 TR_BLANK,
27 TR_CTRL,
28 TR_PRINTABLE,
29 TR_UPPER,
30 TR_LOWER,
31 TR_PUNCT,
32 TR_GRAPH,
33 TR_NONPRINTABLE,
34 TR_BADCLS
35 } tr_cls;
36
37 typedef struct
38 {
39 char *name;
40 tr_cls cls;
41 } tr_cls_str;
42
43 static const tr_cls_str tr_cls_map[] = {
44 {"[:upper:]", TR_UPPER},
45 {"[:lower:]", TR_LOWER},
46 {"[:alpha:]", TR_ALPHA},
47 {"[:alnum:]", TR_ALNUM},
48 {"[:digit:]", TR_NUM},
49 {"[:xdigit:]", TR_XNUM},
50 {"[:space:]", TR_SPACE},
51 {"[:blank:]", TR_BLANK},
52 {"[:cntrl:]", TR_CTRL},
53 {"[:print:]", TR_PRINTABLE},
54 {"[:nprint:]", TR_NONPRINTABLE},
55 {"[:punct:]", TR_PUNCT},
56 {"[:graph:]", TR_GRAPH}
57 };
58
tr_is_cls(char c,tr_cls cls)59 static int tr_is_cls(char c, tr_cls cls)
60 {
61 switch (cls)
62 {
63 case TR_ALPHA:
64 return tl_ascii_isalpha(c);
65 break;
66 case TR_ALNUM:
67 return tl_ascii_isalnum(c);
68 break;
69 case TR_NUM:
70 return tl_ascii_isdigit(c);
71 break;
72 case TR_XNUM:
73 return tl_ascii_isxdigit(c);
74 break;
75 case TR_SPACE:
76 return tl_ascii_isspace(c);
77 break;
78 case TR_BLANK:
79 return tl_ascii_isblank(c);
80 break;
81 case TR_CTRL:
82 return tl_ascii_iscntrl(c);
83 break;
84 case TR_PRINTABLE:
85 return tl_ascii_isprint(c);
86 break;
87 case TR_UPPER:
88 return tl_ascii_isupper(c);
89 break;
90 case TR_LOWER:
91 return tl_ascii_islower(c);
92 break;
93 case TR_PUNCT:
94 return tl_ascii_ispunct(c);
95 break;
96 case TR_GRAPH:
97 return tl_ascii_isgraph(c);
98 break;
99 case TR_NONPRINTABLE:
100 return !tl_ascii_isprint(c);
101 break;
102 default:
103 return FALSE;
104 }
105 }
106
tr_append_cls(char * p,tr_cls cls)107 static unsigned int tr_append_cls(char *p, tr_cls cls)
108 {
109 unsigned int i;
110 unsigned int apnd = 0;
111
112 for(i = 0; i <= 255; i++)
113 {
114 if(tr_is_cls(i, cls))
115 {
116 p[apnd] = i;
117 p[apnd + 1] = '\0';
118 apnd++;
119 }
120 }
121
122 return apnd;
123 }
124
tr_get_escaped_str(char ** p)125 static unsigned char tr_get_escaped_str(char **p)
126 {
127 char *ps = *p;
128 char rc;
129
130 switch (ps[1])
131 {
132 case 'n':
133 rc = '\n';
134 (*p)++;
135 break;
136 case 'r':
137 rc = '\r';
138 (*p)++;
139 break;
140 case 't':
141 rc = '\t';
142 (*p)++;
143 break;
144 break;
145 case '0':
146 if(ps[2] == 'x')
147 {
148 if(tl_ascii_isxdigit(ps[3]) && tl_ascii_isxdigit(ps[4]))
149 {
150 rc = (HEXASC2HEXNR(ps[3]) << 4) + HEXASC2HEXNR(ps[4]);
151 (*p) += 4;
152 }
153 else
154 rc = ps[0];
155 }
156 else
157 rc = ps[0];
158 break;
159 default:
160 rc = ps[1];
161 (*p)++;
162 }
163
164 return rc;
165 }
166
tr_expand_str(char * str,int * rlen)167 static char *tr_expand_str(char *str, int *rlen)
168 {
169 char *p;
170 char pom[4096];
171 int i;
172
173 pom[0] = '\0';
174
175 for(i = 0, p = str; *p; p++)
176 {
177 switch (*p)
178 {
179 case '\\':
180 pom[i] = tr_get_escaped_str(&p);
181 pom[i + 1] = '\0';
182 i++;
183 break;
184 case '[':
185 {
186 int j;
187 tr_cls cls = TR_BADCLS;
188
189 for(j = 0; j < TR_BADCLS; j++)
190 {
191 if(!strncmp(p, tr_cls_map[j].name, strlen(tr_cls_map[j].name)))
192 {
193 cls = tr_cls_map[j].cls;
194 p += strlen(tr_cls_map[j].name) - 1;
195 break;
196 }
197 }
198 if(cls != TR_BADCLS)
199 i += tr_append_cls((pom + i), cls);
200 else
201 {
202 pom[i] = *p;
203 pom[i + 1] = '\0';
204 i++;
205 }
206 }
207 break;
208 case '-':
209 {
210 char strtc;
211 char endc;
212 int pc;
213
214 if(i)
215 strtc = pom[i - 1] + 1;
216 else
217 strtc = '\0';
218
219 if(*(p + 1))
220 {
221 p++;
222 if(*p == '\\')
223 {
224 endc = tr_get_escaped_str(&p);
225 }
226 else
227 endc = *p;
228 }
229 else
230 endc = '\255';
231
232 for(pc = strtc; pc <= endc; pc++)
233 {
234 pom[i] = pc;
235 i++;
236 }
237 }
238 break;
239 default:
240 pom[i] = *p;
241 pom[i + 1] = '\0';
242 i++;
243 }
244 }
245
246 if(rlen)
247 *rlen = i;
248
249 return (tl_strdup(pom));
250 }
251
tr_chr_chr(char * fset,char * tset,char * str)252 char *tr_chr_chr(char *fset, char *tset, char *str)
253 {
254 char *p, *d;
255 int i;
256 int tsetlen = strlen(tset);
257 char *retv = tl_strdup(str);
258
259 for(p = str, d = retv; *p; p++, d++)
260 {
261 for(i = 0; fset[i]; i++)
262 {
263 if(fset[i] == *p)
264 {
265 *d = tset[(tsetlen > i) ? i : (tsetlen - 1)];
266 break;
267 }
268 }
269 }
270
271 return retv;
272 }
273
tr_del_chr(char * set,char * str)274 char *tr_del_chr(char *set, char *str)
275 {
276 char *p, *d;
277 int i;
278 char *retv = tl_strdup(str);
279 int found;
280
281 for(p = str, d = retv; *p; p++)
282 {
283 found = FALSE;
284 for(i = 0; set[i]; i++)
285 {
286 if(set[i] == *p)
287 {
288 found = TRUE;
289 break;
290 }
291 }
292 if(!found)
293 {
294 *d = *p;
295 d++;
296 *d = '\0';
297 }
298 }
299
300 return retv;
301 }
302
tr_str_str(char * s1,char * s2,char * str)303 char *tr_str_str(char *s1, char *s2, char *str)
304 {
305 char *p = str, *p1, *retv;
306 int i = 0;
307
308 while(p)
309 {
310 if((p = strstr(p, s1)))
311 {
312 i++;
313 p += strlen(s1);
314 }
315 }
316
317 retv = (char *) malloc(1 + strlen(str) - i * strlen(s1) +
318 i * (s2 ? strlen(s2) : 0));
319 memset(retv, '\0',
320 1 + strlen(str) - i * strlen(s1) + i * (s2 ? strlen(s2) : 0));
321
322 p = p1 = str;
323
324 while(p1)
325 {
326 p1 = strstr(p, s1);
327 if(p1)
328 {
329 strncat(retv, p, p1 - p);
330 if(s2)
331 strcat(retv, s2);
332 }
333 else
334 strcat(retv, p);
335
336 p = p1 + strlen(s1);
337 }
338
339 return retv;
340 }
341
tr(char * str)342 char *tr(char *str)
343 {
344 char *p1, *p2;
345 char *s1, *s2;
346
347 if(priv_cfg.tr_str_s1 && priv_cfg.tr_str_s2)
348 {
349 p1 = tr_str_str(priv_cfg.tr_str_s1, priv_cfg.tr_str_s2, str);
350 }
351 else
352 {
353 p1 = tl_strdup(str);
354 }
355
356 if(priv_cfg.tr_del_chr)
357 {
358 s1 = tr_expand_str(priv_cfg.tr_del_chr, NULL);
359 p2 = tr_del_chr(s1, p1);
360 _free(s1);
361 _free(p1);
362 }
363 else
364 {
365 p2 = p1;
366 }
367
368 if(priv_cfg.tr_chr_s1 && priv_cfg.tr_chr_s2)
369 {
370 s1 = tr_expand_str(priv_cfg.tr_chr_s1, NULL);
371 s2 = tr_expand_str(priv_cfg.tr_chr_s2, NULL);
372 p1 = tr_chr_chr(s1, s2, p2);
373 _free(s1);
374 _free(s2);
375 _free(p2);
376 }
377 else
378 {
379 p1 = p2;
380 }
381
382 return p1;
383 }
384