1 /*
2 misc.* - misc functions
3 Copyright (C) 1999-2004 Matthew Mueller <donut AT dakotacom.net>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 #include "misc.h"
23 #include "strreps.h"
24 #include "log.h"
25 #include "file.h"
26 #include "path.h"
27
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <string.h>
31 #include "myregex.h"
32
33 #include "_sstream.h"
34 #include <iomanip>
35
36
37 const char hexchar[] = "0123456789abcdef";
hexstr(const string & s)38 string hexstr(const string &s){
39 string ret;
40 for (string::const_iterator i=s.begin(); i!=s.end(); ++i) {
41 uchar c=*i;
42 ret += hexchar[c>>4];
43 ret += hexchar[c&15];
44 }
45 return ret;
46 }
47
parsestr_valcheck(const string & val,bool is_signed)48 void parsestr_valcheck(const string &val, bool is_signed) {
49 if (val.empty())
50 throw parse_error("empty val");
51 if (!is_signed && val.find('-')!=string::npos)
52 throw parse_error("invalid unsigned value");
53 }
parsestr_isscheck(istringstream & iss)54 void parsestr_isscheck(istringstream &iss) {
55 if (iss.fail() || iss.bad())
56 throw parse_error("invalid value");
57 if (!iss.eof() && iss.peek()!=EOF)
58 throw parse_error("trailing junk");
59 }
60
strtolower(const string & s)61 string strtolower(const string &s){
62 string sl = s;
63 lowerstr(sl);
64 return sl;
65 }
66
lowerstr(string & s)67 void lowerstr(string &s){
68 for (string::iterator i=s.begin(); i!=s.end(); ++i)
69 *i=tolower(*i);
70 }
71
strstartswith(const string & s,const string & t)72 bool strstartswith(const string &s, const string &t) {
73 return s.substr(0,t.size()) == t;
74 }
75
regex2wildmat(const string & repat,bool ignorecase)76 string regex2wildmat(const string &repat, bool ignorecase){
77 if (repat.empty())
78 return "*";
79 string wildmat;
80 unsigned int pos=0;
81 if (repat[0]=='^')
82 pos++;
83 else
84 wildmat += '*'; //wildmats are anchored by default, while regexs are the opposite
85 while (pos<repat.size()) {
86 char c = repat[pos];
87 ++pos;
88 if (c == '.') {
89 if (pos<repat.size() && repat[pos] == '*') {
90 wildmat += '*';
91 ++pos;
92 }else
93 wildmat += '?';
94 }
95 else if (c == '\\') {
96 if (pos>=repat.size())
97 throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, c);
98 char nc = repat[pos];
99 if (nc == '*' || nc == '?' || nc == '[' || nc == ']') {
100 wildmat += c;
101 wildmat += nc;
102 ++pos;
103 }
104 else if (nc == '(' || nc == ')' || nc == '{' || nc == '}' || nc == '|' || nc == '.') {
105 wildmat += nc;
106 ++pos;
107 }
108 else if (nc == '<' || nc == '>' || isalnum(nc))
109 throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, c);
110 else {
111 wildmat += nc;
112 ++pos;
113 }
114 }
115 else if (c == '?' || c == '*' || c == '+' || c == '(' || c == ')' || c == '{' || c == '}' || c == '|')
116 throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, c);
117 else if (pos==repat.size() && c == '$')
118 break;//wildmats are anchored by default, while regexs are the opposite
119 else if (ignorecase && isalpha(c)) {
120 wildmat += '[';
121 wildmat += tolower(c);
122 wildmat += toupper(c);
123 wildmat += ']';
124 } else if (c == '[') {
125 wildmat += '[';
126 int nc = -1;
127 unsigned int opos=pos;
128 while (pos<repat.size()) {
129 nc = repat[pos++];
130 if (nc == ']' && opos+1!=pos){
131 wildmat += ']';
132 break;
133 } else if (ignorecase && isalpha(nc)) {
134 wildmat += tolower(nc);
135 wildmat += toupper(nc);
136 } else
137 wildmat += nc;
138 }
139 if (nc!=']')
140 throw RegexEx(Ex_INIT,"error converting regex(%s) to wildmat on char %i: %c", repat.c_str(), pos, nc);
141 } else {
142 wildmat += c;
143 }
144 if (pos==repat.size())
145 wildmat += '*';//wildmats are anchored by default, while regexs are the opposite
146 }
147 //printf("converted %s->%s\n",repat.c_str(),wildmat.c_str());//######
148 return wildmat;
149 }
150
151 #ifndef HAVE_TIMEGM
timegm(const struct tm * gmtimein)152 time_t timegm (const struct tm *gmtimein) {
153 /* The timegm manpage suggests a strategy of setting the TZ env var to ""
154 * and then running tzset(), mktime() and then resetting the TZ var to its
155 * previous value, but unfortunatly it doesn't seem to work on all arches.
156 * So rather than try to figure out when it does we'll use this routine
157 * by Yitzchak Scott-Thoennes that should work on all arches.
158 * (found at http://ais.gmd.de/~veit/os2/mailinglist3/0863.html)
159 */
160 struct tm tm;
161 time_t t, t2;
162
163 tm = *gmtimein; /* make a local copy to fiddle with */
164 tm.tm_isdst = 0; /* treat it as standard time */
165
166 t2 = t = mktime(&tm); /* calculate the time as a local time */
167
168 tm = *gmtime(&t2); /* now calculate the difference between */
169 tm.tm_isdst = 0; /* gm and local time */
170 t2 = mktime(&tm);
171
172 t += t - t2; /* and adjust our answer by that difference */
173 return t;
174 }
175 #endif
176
177
tconv(char * timestr,int max,time_t * curtime,const char * formatstr,int local)178 size_t tconv(char * timestr, int max, time_t *curtime,const char * formatstr, int local) {
179 // static char timestr[80];
180 struct tm *time_now;
181 if (local)
182 time_now = localtime(curtime);
183 else
184 time_now = gmtime(curtime);
185 return strftime(timestr,max,formatstr,time_now);
186 // return timestr;
187 }
188
189 const char *text_month[13]={"Jan", "Feb", "Mar", "Apr",
190 "May", "Jun", "Jul", "Aug",
191 "Sep", "Oct", "Nov", "Dec"
192 };
193
decode_textmonth(const char * buf)194 int decode_textmonth(const char * buf){
195 for (int i=0;i<12;i++){
196 if (!strncasecmp(text_month[i],buf,3))
197 return i;
198 }
199 return -1;
200 }
decode_texttz(const char * buf)201 int decode_texttz(const char * buf){
202 int i=0;
203 if (*buf=='-' || *buf=='+'){
204 i=atoi(buf+1);
205 i=((i/100)*60+(i%100))*60;
206 if (*buf=='-')
207 return -i;
208 }
209 return i;
210 }
211
212 //Tue, 25 May 1999 06:23:23 GMT
213 //21 Jun 99 01:58:12
214
215
216 //Last-modified: Friday, 13-Nov-98 20:41:28 GMT
217 //012345678901234567890123456789
218 //Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
219 //23 May 1999 22:46:41 GMT ; no textual day
220 //25 May 99 01:01:48 +0100 ; 2 digit year
221 //21 Jun 99 01:58:12 ; no timezone
222 //Mon, 24 May 99 11:53:47 GMT ; 2 digit year
223 //3 Jun 1999 12:35:14 -0500 ; non padded day. blah.
224 //Tue, 1 Jun 1999 20:36:29 +0100 ; blah again
225 //Sun, 23 May 1999 19:34:35 -0500 ; ack, timezone
226 //12 July 1999 01:23:05 GMT // full length month
227 //Sun, 15 Aug 1999 19:56 +0100 (BST) // no seconds
228 //Sun, 7 Jul 2002 15:6:5 GMT //1 digit minutes, seconds
229 //Tue, 07 Aug 2002 0:21:00 GMT //1 digit hour with space pad
230 //Sun, 8 Sep 2002 0:19:2 GMT //1 digit hour, no pad
231
232 //Sunday,
233 // 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
234 //Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format (note, this is used by ls -l --full-time)
235 //Jan 4 17:11 ;ls -l format, 17:11 may be replaced by " 1998"
236
237 //easy format 06/11/94[ 23:34[:23][ -300]]
238 //c_regex rfc1123("^[A-Za-z, ]*([0-9]{1,2}) (...) ([0-9]{2,4}) ([0-9]{1,2}):([0-9]{2}):([0-9]{2}) (.*)$"),
239 // rfc850("^[A-Za-z, ]*([0-9]{1,2})-(...)-([0-9]{2,4}) ([0-9]{1,2}):([0-9]{2}):([0-9]{2}) (.*)$"),
240 //#define TIME_REG "([0-9]{1,2}):([0-9]{2}):([0-9]{2})"
241
242 //allows for optional seconds
243 #define TIME_REG2 "([0-9 ]?[0-9]):([0-9]{1,2})(:([0-9]{1,2}))?"
244 c_regex_r xrfc("^[A-Za-z, ]*([0-9]{1,2})[- ](.{3,9})[- ]([0-9]{2,4}) "TIME_REG2" *(.*)$"),
245 xasctime("^[A-Za-z,]* *(...) +([0-9]{1,2}) "TIME_REG2" ([0-9]{2,4}) *(.*)$"),
246 xlsl("^(...) +([0-9]{1,2}) +([0-9:]{4,5})$"),
247 xeasy("^([0-9]{1,4})[-/]([0-9]{1,2})[-/]([0-9]{2,4})( *"TIME_REG2" *(.*))?$"),
248 xiso("^([0-9]{4})-?([0-9]{2})-?([0-9]{2})([T ]?([0-9]{1,2})(:?([0-9]{2})(:?([0-9]{2}))?)?)? *([-+A-Z].*)?$");
249 c_regex_nosub xtime_t("^[0-9]+$")//seconds since 1970 (always gmt)
250 ;
decode_textdate(const char * cbuf,bool local)251 time_t decode_textdate(const char * cbuf, bool local){
252 struct tm tblock;
253 memset(&tblock,0,sizeof(struct tm));
254 const char *tdt=NULL;
255 int td_tz=local?0x7FFFFFFF:0;
256 int yearlen=0;
257 c_regex_subs rsubs;
258 if (!xrfc.match(cbuf,&rsubs)){
259 tdt="xrfc*-date";
260 tblock.tm_mday=atoi(rsubs.subs(1));
261 tblock.tm_mon=decode_textmonth(rsubs.subs(2));
262 tblock.tm_year=atoi(rsubs.subs(3));
263 yearlen=rsubs.sublen(3);
264 tblock.tm_hour=atoi(rsubs.subs(4));
265 tblock.tm_min=atoi(rsubs.subs(5));
266 if(rsubs.sublen(6)>0)
267 tblock.tm_sec=atoi(rsubs.subs(7));
268 if(rsubs.sublen(8)>0)
269 td_tz=decode_texttz(rsubs.subs(8));
270 }else if (!xiso.match(cbuf,&rsubs)){
271 tdt="iso";
272 yearlen=rsubs.sublen(1);
273 tblock.tm_year=atoi(rsubs.subs(1));
274 tblock.tm_mon=atoi(rsubs.subs(2))-1;
275 tblock.tm_mday=atoi(rsubs.subs(3));
276 if(rsubs.sublen(4)>0){
277 tblock.tm_hour=atoi(rsubs.subs(5));
278 if(rsubs.sublen(6)>0){
279 tblock.tm_min=atoi(rsubs.subs(7));
280 if(rsubs.sublen(8)>0){
281 tblock.tm_sec=atoi(rsubs.subs(9));
282 }
283 }
284 }
285 if(rsubs.sublen(10)>0)
286 td_tz=decode_texttz(rsubs.subs(10));
287 }else if (!xasctime.match(cbuf,&rsubs)){
288 tdt="asctime-date";
289 tblock.tm_mon=decode_textmonth(rsubs.subs(1));
290 tblock.tm_mday=atoi(rsubs.subs(2));
291 tblock.tm_hour=atoi(rsubs.subs(3));
292 tblock.tm_min=atoi(rsubs.subs(4));
293 if(rsubs.sublen(5)>0)
294 tblock.tm_sec=atoi(rsubs.subs(6));
295 tblock.tm_year=atoi(rsubs.subs(7));
296 yearlen=rsubs.sublen(7);
297 }else if (!xlsl.match(cbuf,&rsubs)){
298 tdt="ls-l-date";
299 tblock.tm_mon=decode_textmonth(rsubs.subs(1));
300 tblock.tm_mday=atoi(rsubs.subs(2));
301 if (rsubs.subs(3)[2]==':'){
302 time_t curtime;
303 time(&curtime);
304 struct tm *lt = localtime(&curtime);
305
306 tblock.tm_hour=atoi(rsubs.subs(3));
307 tblock.tm_min=atoi(rsubs.subs(3)+3);
308
309 if (lt->tm_mon>=tblock.tm_mon)
310 tblock.tm_year=lt->tm_year;
311 else
312 tblock.tm_year=lt->tm_year-1;
313 }else{
314 yearlen=rsubs.sublen(3);
315 tblock.tm_year=atoi(rsubs.subs(3));
316 }
317 }else if (!xeasy.match(cbuf,&rsubs)){
318 tdt="easy-date";
319 int a=atoi(rsubs.subs(1)),b=atoi(rsubs.subs(2)),c=atoi(rsubs.subs(3));
320 if((rsubs.sublen(1)>2 || a>12 || a<=0) && (b>=1 && b<=12 && c>=1 && c<=31)){
321 //year/mon/day format...
322 tblock.tm_mon=b-1;
323 tblock.tm_mday=c;
324 tblock.tm_year=a;
325 yearlen=rsubs.sublen(1);
326 }else{
327 //mon/day/year format...
328 tblock.tm_mon=a-1;
329 tblock.tm_mday=b;
330 tblock.tm_year=c;
331 yearlen=rsubs.sublen(3);
332 }
333 if(rsubs.sublen(4)>0){
334 tblock.tm_hour=atoi(rsubs.subs(5));
335 tblock.tm_min=atoi(rsubs.subs(6));
336 if(rsubs.sublen(7)>0){
337 tblock.tm_sec=atoi(rsubs.subs(8));
338 }
339 if(rsubs.sublen(9)>0)
340 td_tz=decode_texttz(rsubs.subs(9));
341 }
342 }else if (!xtime_t.match(cbuf)){
343 tdt="time_t-date";
344 return atol(cbuf);
345 }
346 if(yearlen>=4)
347 tblock.tm_year-=1900;
348 else if(yearlen==2 && tblock.tm_year<70)
349 tblock.tm_year+=100;//assume anything before (19)70 is 20xx
350 if(!tdt){
351 PERROR("decode_textdate: unknown %s",cbuf);
352 return 0;
353 }else
354 PDEBUG(DEBUG_ALL,"decode_textdate: %s %i %i %i %i %i %i %i",tdt,tblock.tm_year,tblock.tm_mon,tblock.tm_mday,tblock.tm_hour,tblock.tm_min,tblock.tm_sec,td_tz);
355 if (local && td_tz==0x7FFFFFFF){//if local=1 and time string didn't contain a timezone, just use mktime directly.
356 tblock.tm_isdst = -1;
357 return mktime(&tblock);
358 }else
359 return timegm(&tblock)-td_tz;
360 }
361
362 c_regex_r xduration("^ *([0-9]+ *ye?a?r?s?)? *([0-9]+ *mon?t?h?s?)? *([0-9]+ *we?e?k?s?)? *([0-9]+ *da?y?s?)? *([0-9]+ *ho?u?r?s?)? *([0-9]+ *mi?n?u?t?e?s?)? *([0-9]+ *se?c?o?n?d?s?)? *$", REG_ICASE|REG_EXTENDED);
decode_textage(const char * cbuf)363 time_t decode_textage(const char *cbuf) {
364 time_t now=time(NULL);
365 struct tm tblock = *localtime(&now);
366 c_regex_subs rsubs;
367 if (!xduration.match(cbuf,&rsubs)){
368 // if(rsubs.sublen(1)>0)
369 // age+=atol(rsubs.subs(1))*31556952; //365.2425*24*60*60
370 if(rsubs.sublen(1)>0)
371 tblock.tm_year-=atol(rsubs.subs(1));
372 if(rsubs.sublen(2)>0)
373 tblock.tm_mon-=atol(rsubs.subs(2));
374 if(rsubs.sublen(3)>0)
375 tblock.tm_mday-=atol(rsubs.subs(3))*7;
376 if(rsubs.sublen(4)>0)
377 tblock.tm_mday-=atol(rsubs.subs(4));
378 if(rsubs.sublen(5)>0)
379 tblock.tm_hour-=atol(rsubs.subs(5));
380 if(rsubs.sublen(6)>0)
381 tblock.tm_min-=atol(rsubs.subs(6));
382 if(rsubs.sublen(7)>0)
383 tblock.tm_sec-=atol(rsubs.subs(7));
384 }else {
385 PERROR("decode_textage: unknown %s",cbuf);
386 return 0;
387 }
388 //return now - mktime(&tblock);
389 return mktime(&tblock);
390 }
391
filecompare(const char * old_fn,const char * nfn)392 int filecompare(const char *old_fn,const char *nfn){
393 off_t old_size, new_size;
394 if (!fsize(old_fn,&old_size) && !fsize(nfn, &new_size) && old_size!=new_size)
395 return 0;
396 c_file_fd old_f(old_fn, O_RDONLY|O_BINARY);
397 c_file_fd new_f(nfn, O_RDONLY|O_BINARY);
398 char old_buf[4096], new_buf[4096];
399 int old_len, new_len;
400 // read and compare the files
401 while(1){
402 old_len=old_f.read(old_buf, 4096);
403 new_len=new_f.read(new_buf, 4096);
404 if (old_len == new_len){
405 if (old_len == 0){
406 return 1;
407 }
408 if (memcmp(old_buf, new_buf, old_len)){
409 return 0;
410 }
411 } else {
412 return 0;
413 }
414 }
415 }
416
417