1 /*
2     misc.* - misc functions
3     Copyright (C) 1999-2004  Matthew Mueller <donut AT dakotacom.net>
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 #include "misc.h"
23 #include "strreps.h"
24 #include "log.h"
25 #include "file.h"
26 #include "path.h"
27 
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <string.h>
31 #include "myregex.h"
32 
33 #include "_sstream.h"
34 #include <iomanip>
35 
36 
37 const char hexchar[] = "0123456789abcdef";
hexstr(const string & s)38 string hexstr(const string &s){
39 	string ret;
40 	for (string::const_iterator i=s.begin(); i!=s.end(); ++i) {
41 		uchar c=*i;
42 		ret += hexchar[c>>4];
43 		ret += hexchar[c&15];
44 	}
45 	return ret;
46 }
47 
parsestr_valcheck(const string & val,bool is_signed)48 void parsestr_valcheck(const string &val, bool is_signed) {
49 	if (val.empty())
50 		throw parse_error("empty val");
51 	if (!is_signed && val.find('-')!=string::npos)
52 		throw parse_error("invalid unsigned value");
53 }
parsestr_isscheck(istringstream & iss)54 void parsestr_isscheck(istringstream &iss) {
55 	if (iss.fail() || iss.bad())
56 		throw parse_error("invalid value");
57 	if (!iss.eof() && iss.peek()!=EOF)
58 		throw parse_error("trailing junk");
59 }
60 
strtolower(const string & s)61 string strtolower(const string &s){
62 	string sl = s;
63 	lowerstr(sl);
64 	return sl;
65 }
66 
lowerstr(string & s)67 void lowerstr(string &s){
68 	for (string::iterator i=s.begin(); i!=s.end(); ++i)
69 		*i=tolower(*i);
70 }
71 
strstartswith(const string & s,const string & t)72 bool strstartswith(const string &s, const string &t) {
73 	return s.substr(0,t.size()) == t;
74 }
75 
regex2wildmat(const string & repat,bool ignorecase)76 string regex2wildmat(const string &repat, bool ignorecase){
77 	if (repat.empty())
78 		return "*";
79 	string wildmat;
80 	unsigned int pos=0;
81 	if (repat[0]=='^')
82 		pos++;
83 	else
84 		wildmat += '*'; //wildmats are anchored by default, while regexs are the opposite
85 	while (pos<repat.size()) {
86 		char c = repat[pos];
87 		++pos;
88 		if (c == '.') {
89 			if (pos<repat.size() && repat[pos] == '*') {
90 				wildmat += '*';
91 				++pos;
92 			}else
93 				wildmat += '?';
94 		}
95 		else if (c == '\\') {
96 			if (pos>=repat.size())
97 				throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, c);
98 			char nc = repat[pos];
99 			if (nc == '*' || nc == '?' || nc == '[' || nc == ']') {
100 				wildmat += c;
101 				wildmat += nc;
102 				++pos;
103 			}
104 			else if (nc == '(' || nc == ')' || nc == '{' || nc == '}' || nc == '|' || nc == '.') {
105 				wildmat += nc;
106 				++pos;
107 			}
108 			else if (nc == '<' || nc == '>' || isalnum(nc))
109 				throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, c);
110 			else {
111 				wildmat += nc;
112 				++pos;
113 			}
114 		}
115 		else if (c == '?' || c == '*' || c == '+' || c == '(' || c == ')' || c == '{' || c == '}' || c == '|')
116 			throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, c);
117 		else if (pos==repat.size() && c == '$')
118 			break;//wildmats are anchored by default, while regexs are the opposite
119 		else if (ignorecase && isalpha(c)) {
120 			wildmat += '[';
121 			wildmat += tolower(c);
122 			wildmat += toupper(c);
123 			wildmat += ']';
124 		} else if (c == '[') {
125 			wildmat += '[';
126 			int nc = -1;
127 			unsigned int opos=pos;
128 			while (pos<repat.size()) {
129 				nc = repat[pos++];
130 				if (nc == ']' && opos+1!=pos){
131 					wildmat += ']';
132 					break;
133 				} else if (ignorecase && isalpha(nc)) {
134 					wildmat += tolower(nc);
135 					wildmat += toupper(nc);
136 				} else
137 					wildmat += nc;
138 			}
139 			if (nc!=']')
140 				throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, nc);
141 		} else {
142 			wildmat += c;
143 		}
144 		if (pos==repat.size())
145 			wildmat += '*';//wildmats are anchored by default, while regexs are the opposite
146 	}
147 	//printf("converted %s->%s\n",repat.c_str(),wildmat.c_str());//######
148 	return wildmat;
149 }
150 
151 #ifndef HAVE_TIMEGM
timegm(const struct tm * gmtimein)152 time_t timegm (const struct tm *gmtimein) {
153 	/* The timegm manpage suggests a strategy of setting the TZ env var to ""
154 	 * and then running tzset(), mktime() and then resetting the TZ var to its
155 	 * previous value, but unfortunatly it doesn't seem to work on all arches.
156 	 * So rather than try to figure out when it does we'll use this routine
157 	 * by Yitzchak Scott-Thoennes that should work on all arches.
158 	 * (found at http://ais.gmd.de/~veit/os2/mailinglist3/0863.html)
159 	 */
160 	struct tm tm;
161 	time_t t, t2;
162 
163 	tm = *gmtimein; /* make a local copy to fiddle with */
164 	tm.tm_isdst = 0; /* treat it as standard time */
165 
166 	t2 = t = mktime(&tm); /* calculate the time as a local time */
167 
168 	tm = *gmtime(&t2); /* now calculate the difference between */
169 	tm.tm_isdst = 0; /* gm and local time */
170 	t2 = mktime(&tm);
171 
172 	t += t - t2; /* and adjust our answer by that difference */
173 	return t;
174 }
175 #endif
176 
177 
tconv(char * timestr,int max,time_t * curtime,const char * formatstr,int local)178 size_t tconv(char * timestr, int max, time_t *curtime,const char * formatstr, int local) {
179 //	static char timestr[80];
180 	struct tm *time_now;
181 	if (local)
182 		time_now = localtime(curtime);
183 	else
184 		time_now = gmtime(curtime);
185 	return strftime(timestr,max,formatstr,time_now);
186 //	return timestr;
187 }
188 
189 const char *text_month[13]={"Jan", "Feb", "Mar", "Apr",
190 	"May", "Jun", "Jul", "Aug",
191 	"Sep", "Oct", "Nov", "Dec"
192 };
193 
decode_textmonth(const char * buf)194 int decode_textmonth(const char * buf){
195 	for (int i=0;i<12;i++){
196 		if (!strncasecmp(text_month[i],buf,3))
197 			return i;
198 	}
199 	return -1;
200 }
decode_texttz(const char * buf)201 int decode_texttz(const char * buf){
202 	int i=0;
203 	if (*buf=='-' || *buf=='+'){
204 		i=atoi(buf+1);
205 		i=((i/100)*60+(i%100))*60;
206 		if (*buf=='-')
207 			return -i;
208 	}
209 	return i;
210 }
211 
212 //Tue, 25 May 1999 06:23:23 GMT
213 //21 Jun 99 01:58:12
214 
215 
216 //Last-modified: Friday, 13-Nov-98 20:41:28 GMT
217 //012345678901234567890123456789
218 //Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
219 //23 May 1999 22:46:41 GMT ; no textual day
220 //25 May 99 01:01:48 +0100 ; 2 digit year
221 //21 Jun 99 01:58:12 ; no timezone
222 //Mon, 24 May 99 11:53:47 GMT ; 2 digit year
223 //3 Jun 1999 12:35:14 -0500 ; non padded day. blah.
224 //Tue, 1 Jun 1999 20:36:29 +0100 ; blah again
225 //Sun, 23 May 1999 19:34:35 -0500 ; ack, timezone
226 //12 July 1999 01:23:05 GMT // full length month
227 //Sun, 15 Aug 1999 19:56 +0100 (BST) // no seconds
228 //Sun, 7 Jul 2002 15:6:5 GMT //1 digit minutes, seconds
229 //Tue, 07 Aug 2002  0:21:00 GMT //1 digit hour with space pad
230 //Sun, 8 Sep 2002 0:19:2 GMT //1 digit hour, no pad
231 
232 //Sunday,
233 // 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
234 //Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format (note, this is used by ls -l --full-time)
235 //Jan  4 17:11 			;ls -l format, 17:11 may be replaced by " 1998"
236 
237 //easy format 06/11/94[ 23:34[:23][ -300]]
238 //c_regex rfc1123("^[A-Za-z, ]*([0-9]{1,2}) (...) ([0-9]{2,4}) ([0-9]{1,2}):([0-9]{2}):([0-9]{2}) (.*)$"),
239 //	rfc850("^[A-Za-z, ]*([0-9]{1,2})-(...)-([0-9]{2,4}) ([0-9]{1,2}):([0-9]{2}):([0-9]{2}) (.*)$"),
240 //#define TIME_REG "([0-9]{1,2}):([0-9]{2}):([0-9]{2})"
241 
242 //allows for optional seconds
243 #define TIME_REG2 "([0-9 ]?[0-9]):([0-9]{1,2})(:([0-9]{1,2}))?"
244 c_regex_r xrfc("^[A-Za-z, ]*([0-9]{1,2})[- ](.{3,9})[- ]([0-9]{2,4}) "TIME_REG2" *(.*)$"),
245 	xasctime("^[A-Za-z,]* *(...) +([0-9]{1,2}) "TIME_REG2" ([0-9]{2,4}) *(.*)$"),
246 	xlsl("^(...) +([0-9]{1,2}) +([0-9:]{4,5})$"),
247 	xeasy("^([0-9]{1,4})[-/]([0-9]{1,2})[-/]([0-9]{2,4})( *"TIME_REG2" *(.*))?$"),
248 	xiso("^([0-9]{4})-?([0-9]{2})-?([0-9]{2})([T ]?([0-9]{1,2})(:?([0-9]{2})(:?([0-9]{2}))?)?)? *([-+A-Z].*)?$");
249 c_regex_nosub xtime_t("^[0-9]+$")//seconds since 1970 (always gmt)
250 		;
decode_textdate(const char * cbuf,bool local)251 time_t decode_textdate(const char * cbuf, bool local){
252 	struct tm tblock;
253 	memset(&tblock,0,sizeof(struct tm));
254 	const char *tdt=NULL;
255 	int td_tz=local?0x7FFFFFFF:0;
256 	int yearlen=0;
257 	c_regex_subs rsubs;
258 	if (!xrfc.match(cbuf,&rsubs)){
259 		tdt="xrfc*-date";
260 		tblock.tm_mday=atoi(rsubs.subs(1));
261 		tblock.tm_mon=decode_textmonth(rsubs.subs(2));
262 		tblock.tm_year=atoi(rsubs.subs(3));
263 		yearlen=rsubs.sublen(3);
264 		tblock.tm_hour=atoi(rsubs.subs(4));
265 		tblock.tm_min=atoi(rsubs.subs(5));
266 		if(rsubs.sublen(6)>0)
267 			tblock.tm_sec=atoi(rsubs.subs(7));
268 		if(rsubs.sublen(8)>0)
269 			td_tz=decode_texttz(rsubs.subs(8));
270 	}else if (!xiso.match(cbuf,&rsubs)){
271 		tdt="iso";
272 		yearlen=rsubs.sublen(1);
273 		tblock.tm_year=atoi(rsubs.subs(1));
274 		tblock.tm_mon=atoi(rsubs.subs(2))-1;
275 		tblock.tm_mday=atoi(rsubs.subs(3));
276 		if(rsubs.sublen(4)>0){
277 			tblock.tm_hour=atoi(rsubs.subs(5));
278 			if(rsubs.sublen(6)>0){
279 				tblock.tm_min=atoi(rsubs.subs(7));
280 				if(rsubs.sublen(8)>0){
281 					tblock.tm_sec=atoi(rsubs.subs(9));
282 				}
283 			}
284 		}
285 		if(rsubs.sublen(10)>0)
286 			td_tz=decode_texttz(rsubs.subs(10));
287 	}else if (!xasctime.match(cbuf,&rsubs)){
288 		tdt="asctime-date";
289 		tblock.tm_mon=decode_textmonth(rsubs.subs(1));
290 		tblock.tm_mday=atoi(rsubs.subs(2));
291 		tblock.tm_hour=atoi(rsubs.subs(3));
292 		tblock.tm_min=atoi(rsubs.subs(4));
293 		if(rsubs.sublen(5)>0)
294 			tblock.tm_sec=atoi(rsubs.subs(6));
295 		tblock.tm_year=atoi(rsubs.subs(7));
296 		yearlen=rsubs.sublen(7);
297 	}else if (!xlsl.match(cbuf,&rsubs)){
298 		tdt="ls-l-date";
299 		tblock.tm_mon=decode_textmonth(rsubs.subs(1));
300 		tblock.tm_mday=atoi(rsubs.subs(2));
301 		if (rsubs.subs(3)[2]==':'){
302 			time_t curtime;
303 			time(&curtime);
304 			struct tm *lt = localtime(&curtime);
305 
306 			tblock.tm_hour=atoi(rsubs.subs(3));
307 			tblock.tm_min=atoi(rsubs.subs(3)+3);
308 
309 			if (lt->tm_mon>=tblock.tm_mon)
310 				tblock.tm_year=lt->tm_year;
311 			else
312 				tblock.tm_year=lt->tm_year-1;
313 		}else{
314 			yearlen=rsubs.sublen(3);
315 			tblock.tm_year=atoi(rsubs.subs(3));
316 		}
317 	}else if (!xeasy.match(cbuf,&rsubs)){
318 		tdt="easy-date";
319 		int a=atoi(rsubs.subs(1)),b=atoi(rsubs.subs(2)),c=atoi(rsubs.subs(3));
320 		if((rsubs.sublen(1)>2 || a>12 || a<=0) && (b>=1 && b<=12 && c>=1 && c<=31)){
321 			//year/mon/day format...
322 			tblock.tm_mon=b-1;
323 			tblock.tm_mday=c;
324 			tblock.tm_year=a;
325 			yearlen=rsubs.sublen(1);
326 		}else{
327 			//mon/day/year format...
328 			tblock.tm_mon=a-1;
329 			tblock.tm_mday=b;
330 			tblock.tm_year=c;
331 			yearlen=rsubs.sublen(3);
332 		}
333 		if(rsubs.sublen(4)>0){
334 			tblock.tm_hour=atoi(rsubs.subs(5));
335 			tblock.tm_min=atoi(rsubs.subs(6));
336 			if(rsubs.sublen(7)>0){
337 				tblock.tm_sec=atoi(rsubs.subs(8));
338 			}
339 			if(rsubs.sublen(9)>0)
340 				td_tz=decode_texttz(rsubs.subs(9));
341 		}
342 	}else if (!xtime_t.match(cbuf)){
343 		tdt="time_t-date";
344 		return atol(cbuf);
345 	}
346 	if(yearlen>=4)
347 		tblock.tm_year-=1900;
348 	else if(yearlen==2 && tblock.tm_year<70)
349 		tblock.tm_year+=100;//assume anything before (19)70 is 20xx
350 	if(!tdt){
351 		PERROR("decode_textdate: unknown %s",cbuf);
352 		return 0;
353 	}else
354 		PDEBUG(DEBUG_ALL,"decode_textdate: %s %i %i %i %i %i %i %i",tdt,tblock.tm_year,tblock.tm_mon,tblock.tm_mday,tblock.tm_hour,tblock.tm_min,tblock.tm_sec,td_tz);
355 	if (local && td_tz==0x7FFFFFFF){//if local=1 and time string didn't contain a timezone, just use mktime directly.
356 		tblock.tm_isdst = -1;
357 		return mktime(&tblock);
358 	}else
359 		return timegm(&tblock)-td_tz;
360 }
361 
362 c_regex_r xduration("^ *([0-9]+ *ye?a?r?s?)? *([0-9]+ *mon?t?h?s?)? *([0-9]+ *we?e?k?s?)? *([0-9]+ *da?y?s?)? *([0-9]+ *ho?u?r?s?)? *([0-9]+ *mi?n?u?t?e?s?)? *([0-9]+ *se?c?o?n?d?s?)? *$", REG_ICASE|REG_EXTENDED);
decode_textage(const char * cbuf)363 time_t decode_textage(const char *cbuf) {
364 	time_t now=time(NULL);
365 	struct tm tblock = *localtime(&now);
366 	c_regex_subs rsubs;
367 	if (!xduration.match(cbuf,&rsubs)){
368 //		if(rsubs.sublen(1)>0)
369 //			age+=atol(rsubs.subs(1))*31556952; //365.2425*24*60*60
370 		if(rsubs.sublen(1)>0)
371 			tblock.tm_year-=atol(rsubs.subs(1));
372 		if(rsubs.sublen(2)>0)
373 			tblock.tm_mon-=atol(rsubs.subs(2));
374 		if(rsubs.sublen(3)>0)
375 			tblock.tm_mday-=atol(rsubs.subs(3))*7;
376 		if(rsubs.sublen(4)>0)
377 			tblock.tm_mday-=atol(rsubs.subs(4));
378 		if(rsubs.sublen(5)>0)
379 			tblock.tm_hour-=atol(rsubs.subs(5));
380 		if(rsubs.sublen(6)>0)
381 			tblock.tm_min-=atol(rsubs.subs(6));
382 		if(rsubs.sublen(7)>0)
383 			tblock.tm_sec-=atol(rsubs.subs(7));
384 	}else {
385 		PERROR("decode_textage: unknown %s",cbuf);
386 		return 0;
387 	}
388 	//return now - mktime(&tblock);
389 	return mktime(&tblock);
390 }
391 
filecompare(const char * old_fn,const char * nfn)392 int filecompare(const char *old_fn,const char *nfn){
393 	off_t old_size, new_size;
394 	if (!fsize(old_fn,&old_size) && !fsize(nfn, &new_size) && old_size!=new_size)
395 		return 0;
396 	c_file_fd old_f(old_fn, O_RDONLY|O_BINARY);
397 	c_file_fd new_f(nfn, O_RDONLY|O_BINARY);
398 	char	old_buf[4096], new_buf[4096];
399 	int	old_len, new_len;
400 	// read and compare the files
401 	while(1){
402 		old_len=old_f.read(old_buf, 4096);
403 		new_len=new_f.read(new_buf, 4096);
404 		if (old_len == new_len){
405 			if (old_len == 0){
406 				return 1;
407 			}
408 			if (memcmp(old_buf, new_buf, old_len)){
409 				return 0;
410 			}
411 		} else {
412 			return 0;
413 		}
414 	}
415 }
416 
417