1 /*
2 *
3 * kkstrtext string related and text processing routines
4 * $Id: kkstrtext.cc,v 1.45 2005/02/01 00:13:24 konst Exp $
5 *
6 * Copyright (C) 1999-2004 by Konstantin Klyagin <k@thekonst.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21 * USA
22 *
23 */
24 
25 #include "kkstrtext.h"
26 #include <errno.h>
27 
28 #ifdef HAVE_STDINT_H
29 #include <stdint.h> /* for intptr_t */
30 #endif
31 
strcut(char * strin,int frompos,int count)32 char *strcut(char *strin, int frompos, int count) {
33     if(count > 0) {
34 	if(count > strlen(strin)-frompos)
35 	    count = strlen(strin)-frompos;
36 	char *buf = (char *) malloc(strlen(strin) - frompos - count + 1);
37 	memcpy(buf, strin + frompos + count, strlen(strin) - frompos - count);
38 	memcpy(strin + frompos, buf, strlen(strin) - frompos - count);
39 	strin[strlen(strin) - count] = 0;
40 	free(buf);
41     }
42     return strin;
43 }
44 
strimlead(char * str)45 char *strimlead(char *str)  { return trimlead(str, " \t");  }
strimtrail(char * str)46 char *strimtrail(char *str) { return trimtrail(str, " \t"); }
strim(char * str)47 char *strim(char *str)      { return trim(str, " \t");      }
48 
trimlead(char * str,char * chr)49 char *trimlead(char *str, char *chr) {
50     while(strchr(chr, str[0]) && strlen(str)) strcpy(str, str + 1);
51     return str;
52 }
53 
trimtrail(char * str,char * chr)54 char *trimtrail(char *str, char *chr) {
55     while(strlen(str) && strchr(chr, str[strlen(str)-1]))
56       str[strlen(str)-1] = 0;
57     return str;
58 }
59 
trim(char * str,char * chr)60 char *trim(char *str, char *chr) {
61     return trimlead(trimtrail(str, chr), chr);
62 }
63 
time2str(const time_t * t,char * mask,char * sout)64 char *time2str(const time_t *t, char *mask, char *sout) {
65     struct tm *s;
66     char ch, b[10], b1[20];
67     int len, i, j;
68 
69     sout[0] = 0;
70     s = localtime(t);
71 
72     for(i = 0; i < strlen(mask); i++) {
73 	len = 0;
74 
75 	if(strchr("DMYhms", ch = mask[i])) {
76 	    j = i; len = 1;
77 	    while(mask[++j] == ch) len++;
78 	    sprintf(b, "%%0%dd", len);
79 	    i += len-1;
80 
81 	    switch(ch) {
82 		case 'D': sprintf(b1, b, s->tm_mday); break;
83 		case 'M': sprintf(b1, b, s->tm_mon+1); break;
84 		case 'Y':
85 		    j = s->tm_year + 1900;
86 		    sprintf(b1, b, j);
87 		    if(len <= 3) strcut(b1, 0, 2);
88 		    break;
89 		case 'h': sprintf(b1, b, s->tm_hour); break;
90 		case 'm': sprintf(b1, b, s->tm_min); break;
91 		case 's': sprintf(b1, b, s->tm_sec); break;
92 	    }
93 	    strcat(sout, b1);
94 	} else {
95 	    len = strlen(sout);
96 	    sout[len+1] = 0;
97 	    sout[len] = mask[i];
98 	}
99     }
100     return sout;
101 }
102 
str2time(char * sdate,char * mask,time_t * t)103 time_t str2time(char *sdate, char *mask, time_t *t) {
104     struct tm *s;
105     int i, len, j, k;
106     char ch, b[10];
107 
108     s = (struct tm*) malloc(sizeof(struct tm));
109 
110     for(i = 0; i < strlen(mask); i++) {
111 	len = 0;
112 
113 	if(strchr("DMYhms", ch = mask[i])) {
114 	    j = i; len = 1;
115 	    while(mask[++j] == ch) len++;
116 	    i += len-1;
117 
118 	    b[0] = 0;
119 	    for(j = i-len+1; j < i+1; j++) {
120 		k = strlen(b);
121 		b[k+1] = 0;
122 		b[k] = sdate[j];
123 	    }
124 
125 	    switch(ch) {
126 		case 'D': s->tm_mday=atoi(b); break;
127 		case 'M': s->tm_mon=atoi(b); s->tm_mon--; break;
128 		case 'Y': s->tm_year=atoi(b); s->tm_year-=1900; break;
129 		case 'h': s->tm_hour=atoi(b); s->tm_hour--; break;
130 		case 'm': s->tm_min=atoi(b); break;
131 		case 's': s->tm_sec=atoi(b); break;
132 	    }
133 	}
134     }
135 
136     s->tm_isdst = -1;
137     *t = mktime(s);
138     free(s);
139     return *t;
140 }
141 
unmime(const string & text)142 string unmime(const string &text) {
143     string r;
144     char *buf = new char[text.size()+1];
145     strcpy(buf, text.c_str());
146     r = unmime(buf);
147     delete[] buf;
148     return r;
149 }
150 
mime(const string & text)151 string mime(const string &text) {
152     string r;
153     char *buf = new char[text.size()*3+1];
154     r = mime(buf, text.c_str());
155     delete[] buf;
156     return r;
157 }
158 
fromutf8(const string & text)159 string fromutf8(const string &text) {
160     string r;
161     char *buf = (char *) utf8_to_str(text.c_str());
162     r = buf;
163     delete buf;
164     return r;
165 }
166 
toutf8(const string & text)167 string toutf8(const string &text) {
168     string r;
169     char *buf = str_to_utf8(text.c_str());
170     r = buf;
171     delete buf;
172     return r;
173 }
174 
unmime(char * text)175 char *unmime(char *text) {
176     register int s, d;
177     int htm;
178 
179     for(s = 0, d = 0; text[s] != 0; s++) {
180 	if(text[s] == '+') text[d++] = ' '; else
181 	if(text[s] == '%') {
182 	    sscanf(text + s + 1, "%2x", &htm);
183 	    text[d++] = htm;
184 	    s += 2;
185 	} else
186 	    text[d++] = text[s];
187     }
188 
189     text[d] = 0;
190     return(text);
191 }
192 
mime(char * dst,const char * src)193 char *mime(char *dst, const char *src) {
194     register int s, d;
195     char c;
196 
197     for(s = 0, d = 0; src[s]; s++) {
198 	if((src[s] >= 'a' && src[s] <= 'z') ||
199 	   (src[s] >= 'A' && src[s] <= 'Z') ||
200 	   (src[s] >= '0' && src[s] <= '9')) dst[d++] = src[s]; else {
201 	    if(src[s] != ' ') {
202 		dst[d++] = '%';
203 		c = (src[s] >> 4 & 0x0F);
204 		dst[d++] = (c > 9) ? 'A'+c-10 : '0'+c;
205 		c = (src[s] & 0x0F);
206 		dst[d++] = (c > 9) ? 'A'+c-10 : '0'+c;
207 	    } else
208 		dst[d++] = '+';
209 	}
210     }
211 
212     dst[d] = '\0';
213     return(dst);
214 }
215 
strccat(char * dest,char c)216 char *strccat(char *dest, char c) {
217     int k = strlen(dest);
218     dest[k] = c;
219     dest[k+1] = 0;
220     return dest;
221 }
222 
getquotelayout(const string & haystack,const string & qs,const string & aescs)223 vector<int> getquotelayout(const string &haystack, const string &qs, const string &aescs) {
224     vector<int> r;
225     string needle, escs;
226     int pos, prevpos, curpos;
227     char cchar, qchar, prevchar;
228 
229     qchar = 0;
230     curpos = prevpos = 0;
231     escs = (qs == aescs) ? "" : aescs;
232     needle = qs + escs;
233 
234     while((pos = haystack.substr(curpos).find_first_of(needle)) != -1) {
235 	curpos += pos;
236 	cchar = *(haystack.begin()+curpos);
237 
238 	if(escs.find(cchar) != -1) {
239 	    if(qchar)
240 	    if(prevpos == curpos-1)
241 	    if(escs.find(prevchar) != -1) {
242 		/* Neutralize previous esc char */
243 		cchar = 0;
244 	    }
245 	} else if(qs.find(cchar) != -1) {
246 	    if(!((escs.find(prevchar) != -1) && (prevpos == curpos-1))) {
247 		/* Wasn't an escape (right before this quote char) */
248 
249 		if(!qchar || (qchar == cchar)) {
250 		    qchar = qchar ? 0 : cchar;
251 		    r.push_back(curpos);
252 		}
253 	    }
254 	}
255 
256 	prevpos = curpos++;
257 	prevchar = cchar;
258     }
259 
260     return r;
261 }
262 
getsymbolpositions(const string & haystack,const string & needles,const string & qoutes,const string & esc)263 vector<int> getsymbolpositions(const string &haystack, const string &needles, const string &qoutes, const string &esc) {
264     vector<int> r, qp, nr;
265     vector<int>::iterator iq, ir;
266     int pos, st, ed, cpos;
267 
268     for(cpos = 0; (pos = haystack.substr(cpos).find_first_of(needles)) != -1; ) {
269 	r.push_back(cpos+pos);
270 	cpos += pos+1;
271     }
272 
273     qp = getquotelayout(haystack, qoutes, esc);
274     for(iq = qp.begin(); iq != qp.end(); iq++) {
275 	if(!((iq-qp.begin()) % 2)) {
276 	    st = *iq;
277 	    ed = iq+1 != qp.end() ? *(iq+1) : haystack.size();
278 	    nr.clear();
279 
280 	    for(ir = r.begin(); ir != r.end(); ir++) {
281 		if(!(*ir > st && *ir < ed)) {
282 		    nr.push_back(*ir);
283 		}
284 	    }
285 
286 	    r = nr;
287 	}
288     }
289 
290     return r;
291 }
292 
293 #define CHECKESC(curpos, startpos, esc) \
294     if(curpos > startpos+1) \
295     if(strchr(esc, *(curpos-1))) \
296     if(!strchr(esc, *(curpos-2))) { \
297 	curpos++; \
298 	continue; \
299     }
300 
strqpbrk(const char * s,int offset,const char * accept,const char * q,const char * esc)301 const char *strqpbrk(const char *s, int offset, const char *accept, const char *q, const char *esc) {
302     if(!s) return 0;
303     if(!s[0]) return 0;
304 
305     char qchar = 0;
306     const char *ret = 0, *p = s;
307     char *cset = (char *) malloc(strlen(accept)+strlen(q)+1);
308 
309     strcpy(cset, accept);
310     strcat(cset, q);
311 
312     while(p = strpbrk(p, cset)) {
313 	if(strchr(q, *p)) {
314 	    if(strcmp(esc, q))
315 		CHECKESC(p, s, esc);
316 
317 	    if(!qchar) {
318 		qchar = *p;
319 	    } else {
320 		if(*p == qchar) qchar = 0;
321 	    }
322 	} else if((p-s >= offset) && !qchar) {
323 	    ret = p;
324 	    break;
325 	}
326 	p++;
327     }
328 
329     free(cset);
330     return ret;
331 }
332 
strqcasestr(const char * s,const char * str,const char * q,const char * esc)333 const char *strqcasestr(const char *s, const char *str, const char *q, const char *esc) {
334     char quote = 0;
335     int i;
336 
337     for(i = 0; i < strlen(s); i++) {
338 	if(strchr(q, s[i])) {
339 	    if(strcmp(esc, q))
340 		CHECKESC(s+i, s, esc);
341 	    quote = !quote;
342 	}
343 
344 	if(!quote)
345 	if(!strncasecmp(s + i, str, strlen(str))) return s + i;
346     }
347 
348     return 0;
349 }
350 
strqstr(const char * s,const char * str,const char * q,const char * esc)351 const char *strqstr(const char *s, const char *str, const char *q, const char *esc) {
352     char quote;
353     const char *ret = 0, *p, *ss, *r;
354     p = ss = s;
355 
356     while(p = strstr(ss, str)) {
357 	quote = 0;
358 	r = s;
359 
360 	while(r = strpbrk(r, q)) {
361 	    if(r > p) break;
362 	    if(strcmp(esc, q))
363 		CHECKESC(r, s, esc);
364 	    quote = !quote;
365 	    r++;
366 	}
367 
368 	if(!quote) {
369 	    ret = p;
370 	    break;
371 	} else {
372 	    ss = p+strlen(str);
373 	}
374     }
375 
376     return ret;
377 }
378 
strinsert(char * buf,int pos,char * ins)379 char *strinsert(char *buf, int pos, char *ins) {
380     char *p = strdup(buf+pos);
381     memcpy(buf+pos+strlen(ins), p, strlen(p)+1);
382     memcpy(buf+pos, ins, strlen(ins));
383     free(p);
384     return buf;
385 }
386 
strcinsert(char * buf,int pos,char ins)387 char *strcinsert(char *buf, int pos, char ins) {
388     char *p = strdup(buf+pos);
389     memcpy(buf+pos+1, p, strlen(p)+1);
390     buf[pos] = ins;
391     free(p);
392     return buf;
393 }
394 
strchcount(char * s,char * accept)395 int strchcount(char *s, char *accept) {
396     char *p = s-1;
397     int ret = 0;
398     while(p = strpbrk(p+1, accept)) ret++;
399     return ret;
400 }
401 
stralone(char * buf,char * startword,int wordlen,char * delim)402 int stralone(char *buf, char *startword, int wordlen, char *delim) {
403     int leftdelim = 0, rightdelim = 0;
404     leftdelim = (buf != startword && strchr(delim, *(startword-1))) || buf == startword;
405     rightdelim = !*(startword+wordlen) || strchr(delim, *(startword+wordlen));
406     return leftdelim && rightdelim;
407 }
408 
justfname(const string & fname)409 string justfname(const string &fname) {
410     return fname.substr(fname.rfind("/")+1);
411 }
412 
justpathname(const string & fname)413 string justpathname(const string &fname) {
414     int pos;
415 
416     if((pos = fname.rfind("/")) != -1) {
417 	return fname.substr(0, pos);
418     } else {
419 	return "";
420     }
421 }
422 
charpointerfree(void * p)423 void charpointerfree(void *p) {
424     char *cp = (char *) p;
425     if(cp)
426         free (cp);
427 }
428 
nothingfree(void * p)429 void nothingfree(void *p) {
430 }
431 
stringcompare(void * s1,void * s2)432 int stringcompare(void *s1, void *s2) {
433     if(!s1 || !s2) {
434 	return s1 != s2;
435     } else {
436 	return strcmp((char *) s1, (char *) s2);
437     }
438 }
439 
intcompare(void * s1,void * s2)440 int intcompare(void *s1, void *s2) {
441     return (intptr_t) s1 != (intptr_t) s2;
442 }
443 
i2str(int i)444 string i2str(int i) {
445     char buf[64];
446     sprintf(buf, "%d", i);
447     return (string) buf;
448 }
449 
ui2str(int i)450 string ui2str(int i) {
451     char buf[64];
452     sprintf(buf, "%du", i);
453     return (string) buf;
454 }
455 
textscreen(const string & text)456 string textscreen(const string &text) {
457     string r = text;
458 
459     for(int i = 0; i < r.size(); i++) {
460 	if(!isalnum(r[i])) r.insert(i++, "\\");
461     }
462 
463     return r;
464 }
465 
leadcut(const string & base,const string & delim)466 string leadcut(const string &base, const string &delim) {
467     int pos = base.find_first_not_of(delim);
468     return (pos != -1) ? base.substr(pos) : "";
469 }
470 
trailcut(const string & base,const string & delim)471 string trailcut(const string &base, const string &delim) {
472     int pos = base.find_last_not_of(delim);
473     return (pos != -1) ? base.substr(0, pos+1) : "";
474 }
475 
getword(string & base,const string & delim)476 string getword(string &base, const string &delim) {
477     string sub;
478     int i;
479     bool found = false;
480 
481     base = leadcut(base, delim);
482 
483     for(i = 0, sub = base; i < sub.size(); i++)
484     if(strchr(delim.c_str(), sub[i])) {
485 	sub.resize(i);
486 	base.replace(0, i, "");
487 	base = leadcut(base, delim);
488 	found = true;
489 	break;
490     }
491 
492     if(!found) base = "";
493     return sub;
494 }
495 
getwordquote(string & base,string quote,string delim)496 const string getwordquote(string &base, string quote, string delim) {
497     string sub;
498     bool inquote = false;
499     int i;
500 
501     base = leadcut(base, delim);
502 
503     for(i = 0, sub = base; i < sub.size(); i++) {
504 	if(strchr(quote.c_str(), sub[i])) {
505 	    inquote = !inquote;
506 	} else if(!inquote && strchr(delim.c_str(), sub[i])) {
507 	    sub.resize(i);
508 	    base.replace(0, i, "");
509 	    base = leadcut(base, delim);
510 	    break;
511 	}
512     }
513 
514     if(sub == base) base = "";
515     return sub;
516 }
517 
getrword(string & base,const string & delim)518 string getrword(string &base, const string &delim) {
519     string sub;
520     int i;
521 
522     base = trailcut(base, delim);
523 
524     for(i = base.size()-1, sub = base; i >= 0; i--)
525     if(strchr(delim.c_str(), base[i])) {
526 	sub = base.substr(i+1);
527 	base.resize(i);
528 	base = trailcut(base, delim);
529 	break;
530     }
531 
532     if(sub == base) base = "";
533     return sub;
534 }
535 
getrwordquote(string & base,const string & quote,const string & delim)536 string getrwordquote(string &base, const string &quote, const string &delim) {
537     string sub;
538     bool inquote = false;
539     int i;
540 
541     base = trailcut(base, delim);
542 
543     for(i = base.size()-1, sub = base; i >= 0; i--)
544     if(strchr(quote.c_str(), base[i])) {
545 	inquote = !inquote;
546     } else if(!inquote && strchr(delim.c_str(), base[i])) {
547 	sub = base.substr(i+1);
548 	base.resize(i);
549 	base = trailcut(base, delim);
550 	break;
551     }
552 
553     if(sub == base) base = "";
554     return sub;
555 }
556 
rtabmargin(bool fake,int curpos,const char * p)557 int rtabmargin(bool fake, int curpos, const char *p) {
558     int ret = -1, n, near;
559 
560     if(p && (curpos != strlen(p))) {
561 	n = strspn(p+curpos, " ");
562 
563 	if(fake) {
564 	    near = ((curpos/(TAB_SIZE/2))+1)*(TAB_SIZE/2);
565 	    if(n >= near-curpos) ret = near;
566 	}
567 
568 	near = ((curpos/TAB_SIZE)+1)*TAB_SIZE;
569 	if(n >= near-curpos) ret = near;
570     } else {
571 	if(p && fake) fake = (strspn(p, " ") == strlen(p));
572 	if(fake) ret = ((curpos/(TAB_SIZE/2))+1)*(TAB_SIZE/2);
573 	else ret = ((curpos/TAB_SIZE)+1)*TAB_SIZE;
574     }
575 
576     return ret;
577 }
578 
ltabmargin(bool fake,int curpos,const char * p)579 int ltabmargin(bool fake, int curpos, const char *p) {
580     int ret = -1, near, n = 0;
581     const char *cp;
582 
583     if(p) {
584 	cp = p+curpos;
585 
586 	if(curpos) {
587 	    if(*(--cp) == ' ') n++;
588 	    for(; (*cp == ' ') && (cp != p); cp--) n++;
589 	}
590 
591 	if(fake) {
592 	    near = (curpos/(TAB_SIZE/2))*(TAB_SIZE/2);
593 	    if(near <= curpos-n)
594 	    if((ret = curpos-n) != 0) ret++;
595 	}
596 
597 	near = (curpos/TAB_SIZE)*TAB_SIZE;
598 	if(near <= curpos-n) {
599 	    if((ret = curpos-n) != 0) ret++;
600 	} else ret = near;
601 
602     } else {
603 	if(fake) ret = (curpos/(TAB_SIZE/2))*(TAB_SIZE/2);
604 	else ret = (curpos/TAB_SIZE)*TAB_SIZE;
605     }
606 
607     return ret;
608 }
609 
breakintolines(string text,vector<string> & lst,int linelen)610 void breakintolines(string text, vector<string> &lst, int linelen) {
611     int dpos, nlen;
612     string sub;
613     vector<string>::iterator i;
614 
615     breakintolines(text, lst);
616 
617     if(linelen > 0) {
618 	for(i = lst.begin(); i != lst.end(); i++) {
619 	    if(i->size() > linelen) {
620 		sub = i->substr(0, nlen = linelen);
621 
622 		if((dpos = sub.rfind(" ")) != -1) {
623 		    if(dpos) nlen = dpos; else nlen = 1;
624 		}
625 
626 		if(dpos != -1)
627 		    nlen++;
628 
629 		sub = i->substr(nlen);
630 		i->erase(nlen);
631 		lst.insert(i+1, sub);
632 		i = lst.begin();
633 	    }
634 	}
635     }
636 }
637 
breakintolines(const string & text,vector<string> & lst)638 void breakintolines(const string &text, vector<string> &lst) {
639     int npos, dpos, tpos;
640     string sub;
641 
642     tpos = 0;
643     lst.clear();
644 
645     while(tpos < text.size()) {
646 	if((npos = text.find("\n", tpos)) != -1) {
647 	    sub = text.substr(tpos, npos-tpos);
648 	} else {
649 	    sub = text.substr(tpos);
650 	    npos = text.size();
651 	}
652 
653 	tpos += npos-tpos+1;
654 
655 	for(dpos = 0; (dpos = sub.find("\r", dpos)) != -1; ) {
656 	    sub.erase(dpos, 1);
657 	}
658 
659 	for(dpos = 0; (dpos = sub.find("\t", dpos)) != -1; ) {
660 	    sub.erase(dpos, 1);
661 	    sub.insert(dpos, string(rtabmargin(false, dpos)-dpos, ' '));
662 	}
663 
664 	lst.push_back(sub);
665     }
666 }
667 
find_gather_quoted(vector<quotedblock> & lst,const string & str,const string & quote,const string & escape)668 void find_gather_quoted(vector<quotedblock> &lst, const string &str,
669 const string &quote, const string &escape) {
670     bool inquote = false;
671     int npos = 0, qch;
672     quotedblock qb;
673 
674     while((npos = str.find_first_of(quote, npos)) != -1) {
675 	if(npos)
676 	if(escape.find(str[npos-1]) == -1) {
677 	    inquote = !inquote;
678 
679 	    if(inquote) {
680 		qb.begin = npos;
681 		qch = str[npos];
682 	    } else {
683 		if(str[npos] == qch) {
684 		    qb.end = npos;
685 		    lst.push_back(qb);
686 		} else {
687 		    inquote = true;
688 		}
689 	    }
690 	}
691 	npos++;
692     }
693 }
694 
find_quoted(const string & str,const string & needle,int offs,const string & quote,const string & escape)695 int find_quoted(const string &str, const string &needle, int offs,
696 const string &quote, const string &escape) {
697     vector<quotedblock> positions;
698     vector<quotedblock>::iterator qi;
699     int npos = offs;
700     bool found;
701 
702     find_gather_quoted(positions, str, quote, escape);
703 
704     while((npos = str.find(needle, npos)) != -1) {
705 	for(found = false, qi = positions.begin(); qi != positions.end() && !found; qi++)
706 	    if((npos > qi->begin) && (npos < qi->end)) found = true;
707 
708 	if(!found) break;
709 	npos++;
710     }
711 
712     return !found ? npos : -1;
713 }
714 
find_quoted_first_of(const string & str,const string & needle,int offs,const string & quote,const string & escape)715 int find_quoted_first_of(const string &str, const string &needle, int offs,
716 const string &quote, const string &escape) {
717     vector<quotedblock> positions;
718     vector<quotedblock>::iterator qi;
719     int npos = offs;
720     bool found;
721 
722     find_gather_quoted(positions, str, quote, escape);
723 
724     while((npos = str.find_first_of(needle, npos)) != -1) {
725 	for(found = false, qi = positions.begin(); qi != positions.end() && !found; qi++)
726 	    if((npos > qi->begin) && (npos < qi->end)) found = true;
727 
728 	if(!found) break;
729 	npos++;
730     }
731 
732     return !found ? npos : -1;
733 }
734 
splitlongtext(string text,vector<string> & lst,int size,const string cont)735 void splitlongtext(string text, vector<string> &lst, int size, const string cont) {
736     string sub;
737     int npos;
738 
739     lst.clear();
740 
741     while(!text.empty()) {
742 	if(text.size() <= size-cont.size()) {
743 	    npos = text.size();
744 	} else if((npos = text.substr(0, size-cont.size()).find_last_of(" \t")) == -1) {
745 	    npos = size-cont.size();
746 	}
747 
748 	sub = text.substr(0, npos);
749 	text.erase(0, npos);
750 
751 	if(text.size() > cont.size()) sub += cont; else {
752 	    sub += text;
753 	    text = "";
754 	}
755 
756 	if((npos = text.find_first_not_of(" \t")) != -1)
757 	    text.erase(0, npos);
758 
759 	lst.push_back(sub);
760     }
761 }
762 
strdateandtime(time_t stamp,const string & fmt)763 string strdateandtime(time_t stamp, const string &fmt) {
764     return strdateandtime(localtime(&stamp), fmt);
765 }
766 
strdateandtime(struct tm * tms,const string & fmt)767 string strdateandtime(struct tm *tms, const string &fmt) {
768     char buf[512];
769     /*time_t current_time = time(0);*/
770     /*time_t when = mktime(tms);*/
771     string afmt = fmt;
772 
773     if(afmt.empty()) {
774 	afmt = "%b %e %Y %H:%M";
775 /*
776 	if(current_time > when + 6L * 30L * 24L * 60L * 60L // Old.
777 	|| current_time < when - 60L * 60L) {               // Future.
778 	    afmt = "%b %e  %Y";
779 	} else {
780 	    afmt = "%b %e %H:%M";
781 	}
782 */
783     }
784 
785     strftime(buf, 512, afmt.c_str(), tms);
786     return buf;
787 }
788 
iswholeword(const string & s,int so,int eo)789 bool iswholeword(const string &s, int so, int eo) {
790     bool rm, lm;
791     const string wdelims = "[](),.; <>-+{}=|&%~*/:?@";
792 
793     lm = !so || (wdelims.find(s.substr(so-1, 1)) != -1);
794     rm = (eo == s.size()-1) || (wdelims.find(s.substr(eo, 1)) != -1);
795 
796     return rm && lm;
797 }
798 
hex2int(const string & ahex)799 int hex2int(const string &ahex) {
800     int r, i;
801 
802     r = 0;
803 
804     if(ahex.size() <= 2) {
805 	for(i = 0; i < ahex.size(); i++) {
806 	    r += isdigit(ahex[i]) ? ahex[i]-48 : toupper(ahex[i])-55;
807 	    if(!i) r *= 16;
808 	}
809     }
810 
811     return r;
812 }
813 
getconf(string & st,string & buf,ifstream & f,bool passemptylines)814 bool getconf(string &st, string &buf, ifstream &f, bool passemptylines) {
815     bool ret = false;
816     static string sect;
817 
818     while(!f.eof() && !ret) {
819 	getstring(f, buf);
820 
821 	if(buf.size()) {
822 	    switch(buf[0]) {
823 		case '%':
824 		    sect = buf.substr(1);
825 		    break;
826 		case '#':
827 		    if(buf[1] != '!') break;
828 		default:
829 		    ret = buf.size();
830 		    break;
831 	    }
832 	} else if(passemptylines) {
833 	    ret = 1;
834 	}
835     }
836 
837     st = sect;
838     return ret;
839 }
840 
getstring(istream & f,string & sbuf)841 bool getstring(istream &f, string &sbuf) {
842     static char buf[2048];
843     bool r;
844 
845     if(r = !f.eof()) {
846 	sbuf = "";
847 
848 	do {
849 	    f.clear();
850 	    f.getline(buf, 2048);
851 	    sbuf += buf;
852 	} while(!f.good() && !f.eof());
853     }
854 
855     return r;
856 }
857 
ruscase(const string & s,const string & mode)858 string ruscase(const string &s, const string &mode) {
859     static const string lower = "ÁÂ×ÇÄÅÖÚÉÊËÌÍÎÏÐÒÓÔÕÆÈÃÞÛÝØßÙÜÀÑ";
860     static const string upper = "áâ÷çäåöúéêëìíîïðòóôõæèãþûýøÿùüàñ";
861     string r, tfrom, tto;
862     int pos, tpos;
863 
864     if(mode == "tolower") {
865 	tfrom = upper;
866 	tto = lower;
867     } else if(mode == "toupper") {
868 	tfrom = lower;
869 	tto = upper;
870     } else {
871 	return s;
872     }
873 
874     pos = 0;
875 
876     for(r = s; (pos = r.find_first_of(tfrom, pos)) != -1; ) {
877 	char c = r[pos];
878 	tpos = tfrom.find(c);
879 	r[pos] = tto[tpos];
880 	pos++;
881     }
882 
883     pos = 0;
884 
885     while((pos = r.find_first_not_of(tfrom, pos)) != -1) {
886 	if(mode == "tolower") r[pos] = tolower(r[pos]); else
887 	if(mode == "toupper") r[pos] = toupper(r[pos]);
888 	pos++;
889     }
890 
891     return r;
892 }
893 
siconv(const string & atext,const string & fromcs,const string & tocs)894 string siconv(const string &atext, const string &fromcs, const string &tocs) {
895 #ifdef HAVE_ICONV
896     iconv_t cd = iconv_open(tocs.c_str(), fromcs.c_str());
897 
898     if(cd != ((iconv_t) -1)) {
899 	string r, text(atext);
900 	size_t inleft, outleft, soutleft;
901 	char *inbuf, *outbuf, *sinbuf, *soutbuf;
902 
903 	//from iconv.c (libiconv)
904 	iconv(cd,NULL,NULL,NULL,NULL);
905 
906 	size_t len = text.size();
907 	sinbuf = inbuf = (char *)malloc(len+1);
908 	memcpy(sinbuf, text.c_str(), len+1);
909 	inleft = len;
910 
911 	while (inleft > 0) {
912 	    soutleft = outleft = inleft*4;
913 	    soutbuf = outbuf = new char[outleft];
914 
915 	    size_t res = iconv(cd, (ICONV_CONST char **) &inbuf, &inleft,
916 		&outbuf, &outleft);
917 
918 	    soutbuf[soutleft-outleft] = 0;
919 	    r += soutbuf;
920 
921 	    delete[] soutbuf;
922 
923 	    if ((res == (size_t)(-1)) && (errno != EILSEQ)) {
924 		break;
925 	    }
926 
927 	    if (inleft>0) {
928 		inbuf++;
929 		inleft--;
930 	    }
931 	}
932 
933 	free(sinbuf);
934 
935 	iconv_close(cd);
936 	return r;
937     }
938 #endif
939 
940     return atext;
941 }
942 
cuthtml(const string & html,int flags)943 string cuthtml(const string &html, int flags) {
944     string r, tag, buf, token;
945     int npos, pos, tpos;
946 
947     for(pos = 0; (npos = html.find("<", pos)) != -1; pos = npos) {
948 	tpos = npos;
949 	r += html.substr(pos, npos-pos);
950 
951 	if((npos = html.find(">", ++npos)) != -1) {
952 	    npos++;
953 
954 	    tag = html.substr(tpos+1, npos-tpos-2);
955 	    if(tag.substr(0, 1) == "/") tag.erase(0, 1);
956 	    tag = leadcut(trailcut(tag, "/ \n\r"), "/ \n\r");
957 
958 	    buf = ruscase(tag, "toupper");
959 	    token = getword(buf);
960 
961 	    if(token == "BR") r += (flags & chCutBR) ? "\n" : "<br>";
962 		else if((flags & chCutBR) && token == "P") r += "\n\n";
963 
964 	    if(flags & chLeaveLinks) {
965 		getword(tag);
966 
967 		if(token == "A") {
968 		    if((tpos = buf.find("HREF")) != -1)
969 		    if((tpos = buf.substr(tpos).find("\"")) != -1) {
970 			tag.erase(0, tpos+1);
971 			r += "[ href: " + getword(tag, "\"") + " ] ";
972 		    }
973 
974 		} else if(token == "IMG") {
975 		    if((tpos = buf.find("SRC")) != -1)
976 		    if((tpos = buf.substr(tpos).find("\"")) != -1) {
977 			tag.erase(0, tpos+1);
978 			r += " [ img: " + getword(tag, "\"") + " ]";
979 		    }
980 
981 		}
982 	    }
983 
984 	} else {
985 	    r += html.substr(tpos);
986 	    npos = html.size();
987 	}
988     }
989 
990     if(pos < html.size())
991 	r += html.substr(pos);
992 
993     return r;
994 }
995 
utf8_to_str(const char * pin)996 char *utf8_to_str(const char *pin) {
997     int n = 0, i = 0, inlen;
998     unsigned char *result;
999     const unsigned char *in = (unsigned char *) pin;
1000 
1001     if(!in) return NULL;
1002 
1003     inlen = strlen(pin);
1004     result = new unsigned char[inlen + 1];
1005 
1006     while (n <= inlen - 1) {
1007 	long c = (long)in[n];
1008 	if(c < 0x80) result[i++] = (char)c; else {
1009 	    if((c & 0xC0) == 0xC0) result[i++] = (char)(((c & 0x03) << 6) | (((unsigned char)in[++n]) & 0x3F));
1010 	    else if ((c & 0xE0) == 0xE0) {
1011 		if (n + 2 <= inlen) {
1012 		    result[i] = (char)(((c & 0xF) << 4) | (((unsigned char)in[++n]) & 0x3F));
1013 		    result[i] = (char)(((unsigned char)result[i]) | (((unsigned char)in[++n]) & 0x3F));
1014 		    i++;
1015 		} else n += 2;
1016 	    } else if ((c & 0xF0) == 0xF0) n += 3;
1017 	    else if ((c & 0xF8) == 0xF8)
1018 		n += 4;
1019 	    else if ((c & 0xFC) == 0xFC)
1020 		n += 5;
1021 	}
1022 	n++;
1023     }
1024 
1025     result[i] = '\0';
1026     return (char *) result;
1027 }
1028 
str_to_utf8(const char * pin)1029 char *str_to_utf8(const char *pin) {
1030     int n = 0, i = 0;
1031     int inlen;
1032     char *result = NULL;
1033     const unsigned char *in = (unsigned char *) pin;
1034 
1035     if(!in)
1036 	return NULL;
1037 
1038     inlen = strlen(pin);
1039     result = new char[inlen * 2 + 1];
1040 
1041     while (n < inlen) {
1042 	long c = (long)in[n];
1043 	if (c == 27) {
1044 	    n += 2;
1045 	    if (in[n] == 'x')
1046 		n++;
1047 	    if (in[n] == '3')
1048 		n++;
1049 	    n += 2;
1050 	    continue;
1051 	}
1052 
1053 	if (c < 128)
1054 	    result[i++] = (char)c;
1055 	else {
1056 	    result[i++] = (char)((c >> 6) | 192);
1057 	    result[i++] = (char)((c & 63) | 128);
1058 	}
1059 	n++;
1060     }
1061 
1062     result[i] = '\0';
1063     return result;
1064 }
1065 
striprtf(const string & s,const string & charset)1066 string striprtf(const string &s, const string &charset) {
1067     string r, spec, unichar, tmp;
1068     char pre = 0;
1069     bool bprint, bspec, bunicode;
1070     int bparen = -1;
1071 
1072     bprint = true;
1073     bspec = bunicode = false;
1074 
1075     for(string::const_iterator i = s.begin(); i != s.end(); ++i) {
1076 	if(!isalpha(*i) && !isdigit(*i)) bprint = true;
1077 
1078 	if(bspec) {
1079 	    spec += *i;
1080 
1081 	    if(spec.size() == 2) {
1082 		r += (char) hex2int(spec);
1083 		bspec = false;
1084 		bprint = true;
1085 	    }
1086 
1087 	} else switch(*i) {
1088 	    case '{':
1089 		if(pre != '\\') {
1090 		    bparen++;
1091 		    bprint = false;
1092 		} else {
1093 		    bprint = true;
1094 		    r += *i;
1095 		}
1096 		break;
1097 
1098 	    case '}':
1099 		if(pre != '\\') {
1100 		    bprint = false;
1101 		    bparen--;
1102 		} else {
1103 		    bprint = true;
1104 		    r += *i;
1105 		}
1106 
1107 		break;
1108 
1109 	    case '\\':
1110 		if(pre != '\\') {
1111 		    bprint = false;
1112 		} else {
1113 		    bprint = true;
1114 		    r += *i;
1115 		    pre = 0;
1116 		    continue;
1117 		}
1118 		break;
1119 
1120 	    case '\'':
1121 		if(!bparen && bprint && pre == '\\') {
1122 		    spec = "";
1123 		    bspec = true;
1124 		} else {
1125 		    r += *i;
1126 		}
1127 		break;
1128 
1129 	    case 'u':
1130 		if(!bparen) {
1131 		    if(pre == '\\' && isdigit(*(i+1))) {
1132 			unichar = "";
1133 			bunicode = true;
1134 		    } else if(bprint) {
1135 			r += *i;
1136 		    }
1137 		}
1138 		break;
1139 	    default:
1140 		if(!bparen) {
1141 		    if(bunicode) {
1142 			unichar += *i;
1143 
1144 			if(unichar.size() == 5) {
1145 			    bunicode = false;
1146 			    if(unichar.substr(0, 4).find_first_not_of("0123456789") == -1) {
1147 				long l = strtol(unichar.substr(0, 4).c_str(), 0, 0);
1148 				char ubuf[sizeof(long)+4];
1149 			    #ifdef HAVE_ICONV
1150 				memcpy(ubuf, "\xff\xfe", 2);
1151 				memcpy(ubuf+2, &l, sizeof(long));
1152 				memcpy(ubuf+sizeof(long)+2, "\x0a\x00", 2);
1153 			    #else
1154 				strcpy(ubuf, unichar.substr(4).c_str());
1155 			    #endif
1156 				r += siconv(ubuf , "utf-16", charset);
1157 			    }
1158 			}
1159 
1160 		    } else if(bprint) {
1161 			r += *i;
1162 		    }
1163 		}
1164 	}
1165 
1166 	pre = *i;
1167     }
1168 
1169     return leadcut(trailcut(r));
1170 }
1171