1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4 
5 #include "ppport.h"
6 
7 #include "const-c.inc"
8 
9 #include "locatedb.h"
10 
11 #include <config.h>
12 #include <stdio.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <time.h>
16 #include <fnmatch.h>
17 #include <regex.h>
18 #include <unistd.h>
19 #include <fcntl.h>
20 
21 #define NDEBUG
22 #include <assert.h>
23 
24 #ifdef STDC_HEADERS
25 #include <stdlib.h>
26 #else
27 char *getenv ();
28 #endif
29 
30 #ifdef STDC_HEADERS
31 #include <errno.h>
32 #include <stdlib.h>
33 #else
34 extern int errno;
35 #endif
36 
37 
38 #define WARNING	    0
39 #define MIN_CHUNK   64
40 #define MIN_BLK	    4096
41 #define ALLOC_SIZE  4096
42 #define SLOC_ESC    -0x80
43 
44 #ifndef call_sv
45 #   define call_sv perl_call_sv
46 #endif
47 
48 uid_t UID;
49 gid_t GID;
50 
51 // typedef enum {false, true} boolean;
52 #include <stdbool.h>
53 
last_literal_end(char * name)54 static char * last_literal_end (char *name) {
55     static char *globfree = NULL;	/* A copy of the subpattern in NAME.  */
56     static size_t gfalloc = 0;	    /* Bytes allocated for `globfree'.  */
57     register char *subp;		    /* Return value.  */
58     register char *p;		        /* Search location in NAME.  */
59 
60     /* Find the end of the subpattern.
61      Skip trailing metacharacters and [] ranges. */
62     for (p = name + strlen (name) - 1;
63          p >= name && strchr ("*?]", *p) != NULL;
64          p--) {
65 
66         if (*p == ']')
67             while (p >= name && *p != '[')
68                 p--;
69     }
70 
71     if (p < name)
72         p = name;
73 
74     if (p - name + 3 > gfalloc) {
75         gfalloc = p - name + 3 + 64; /* Room to grow.  */
76 	Renew(globfree, gfalloc, char);
77     }
78 
79     subp = globfree;
80     *subp++ = '\0';
81 
82     /* If the pattern has only metacharacters, make every path match the
83      subpattern, so it gets checked the slow way.  */
84     if (p == name && strchr ("?*[]", *p) != NULL)
85         *subp++ = '/';
86     else {
87         char *endmark;
88         /* Find the start of the metacharacter-free subpattern.  */
89         for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
90             ;
91         /* Copy the subpattern into globfree.  */
92         for (++p; p <= endmark; )
93             *subp++ = *p++;
94     }
95 
96     *subp-- = '\0';		/* Null terminate, though it's not needed.  */
97 
98     return subp;
99 }
100 
getstr(char ** lineptr,size_t * n,FILE * stream,char terminator,int offset)101 int getstr (char **lineptr, size_t *n, FILE *stream,
102             char terminator, int offset) {
103     int nchars_avail;		/* Allocated but unused chars in *LINEPTR.  */
104     char *read_pos;		/* Where we're reading into *LINEPTR. */
105     int ret;
106 
107     if (!lineptr || !n || !stream)
108         return -1;
109 
110     if (!*lineptr) {
111         *n = MIN_CHUNK;
112 	New(0, *lineptr, *n, char);
113         if (!*lineptr)
114             return -1;
115     }
116 
117     nchars_avail = *n - offset;
118     read_pos = *lineptr + offset;
119 
120     for (;;) {
121         register int c = getc (stream);
122 
123         /* We always want at least one char left in the buffer, since we
124            always (unless we get an error while reading the first char)
125            NULL-terminate the line buffer.  */
126 
127         assert(*n - nchars_avail == read_pos - *lineptr);
128         if (nchars_avail < 1) {
129             if (*n > MIN_CHUNK)
130                 *n *= 2;
131             else
132                 *n += MIN_CHUNK;
133 
134             nchars_avail = *n + *lineptr - read_pos;
135             Renew(*lineptr, *n, char);
136             if (!*lineptr)
137                 return -1;
138             read_pos = *n - nchars_avail + *lineptr;
139             assert(*n - nchars_avail == read_pos - *lineptr);
140         }
141 
142         if (c == EOF || ferror (stream)) {
143             /* Return partial line, if any.  */
144             if (read_pos == *lineptr)
145                 return -1;
146             else
147                 break;
148         }
149 
150         *read_pos++ = c;
151         nchars_avail--;
152 
153         if (c == terminator)
154             /* Return the line.  */
155             break;
156     }
157 
158     /* Done - NUL terminate and return the number of chars read.  */
159     *read_pos = '\0';
160 
161     ret = read_pos - (*lineptr + offset);
162     return ret;
163 }
164 
get_short(FILE * fp)165 static int get_short (FILE *fp) {
166     char x[2];
167     fread((void*)&x, 2, 1, fp);
168     return ((x[0]<<8)|(x[1]&0xff));
169 }
170 
s_get_short(char ** fp)171 static int s_get_short (char **fp) {
172     register short x;
173     x = **fp;
174     /* move pointer one byte ahead */
175     (*fp)++;
176     return (x << 8) | (*((*fp)++) & 0xff);
177 }
178 
check_path_access(char * codedpath)179 int check_path_access(char *codedpath) {
180     char *dir = NULL;
181     char *path = NULL;
182     int res;
183     char *str_ptr;
184 
185     if (access(codedpath, R_OK) != 0) {
186 	Safefree(codedpath);
187 	return 0;
188     }
189 
190     New(0, path, strlen(codedpath)+1, char);
191     *path = 0;
192 
193     res = 1;
194     str_ptr = codedpath;
195 
196     while ((dir = strtok(str_ptr, "/"))) {
197 	strcat(path,"/");
198 	strcat(path,dir);
199 	if (access(path, R_OK) != 0) {
200 	    res = 0;
201 	    break;
202 	}
203 	str_ptr = NULL;
204     }
205 
206     Safefree(codedpath);
207 
208     Safefree(path);
209 
210     return res;
211 }
212 
call_coderef(SV * coderef,char * path)213 void call_coderef (SV *coderef, char *path) {
214     dSP;
215 
216     /* FIXME We aren't yet prepared for lexical $_ as coming in 5.9.1 */
217     SAVESPTR(DEFSV);
218 
219     ENTER;
220     SAVETMPS;
221     PUSHMARK(SP);
222     DEFSV = sv_2mortal(newSVpvn(path, strlen(path)));
223     PUTBACK;
224     (void) call_sv(coderef, G_DISCARD);
225 
226     FREETMPS;
227     LEAVE;
228 }
229 #define WARN fprintf(stderr, "%i\n", __LINE__);
230 
231 MODULE = File::Locate		PACKAGE = File::Locate
232 
233 INCLUDE: const-xs.inc
234 
235 BOOT:
236     {
237 	UID = getuid();
238 	GID = getgid();
239     }
240 
241 void
242 _locate (pathpart, ...)
243         char *pathpart;
244     PROTOTYPE: DISABLE
245     PREINIT:
246         char *dbfile = NULL;
247         SV   *coderef = NULL;
248         FILE *fp;           /* The pathname database.  */
249         int c;              /* An input byte.  */
250         int nread;          /* Number of bytes read from an entry.  */
251         bool globflag;   /* true if PATHPART contains globbing
252                                metacharacters.  */
253         char *patend;       /* The end of the last glob-free subpattern
254                                in PATHPART.  */
255         char *path;         /* The current input database entry.  */
256         size_t pathsize;    /* Amount allocated for it.  */
257         int count = 0;      /* The length of the prefix shared with
258                                the previous database entry.  */
259         char *cutoff;       /* Where in `path' to stop the backward search for
260                                the last character in the subpattern.  Set
261                                according to `count'.  */
262         bool prev_fast_match = false;    /* true if we found a fast match
263                                                (of patend) on the previous
264                                                path.  */
265         int printed = 0;                    /* The return value.  */
266         bool old_format = false;         /* true if reading a bigram-encoded
267                                                database.  */
268         char bigram1[128], bigram2[128];    /* For the old database format, the
269                                                first and second characters of
270                                                the most common bigrams.  */
271 	/* regex stuff */
272 	int REGEX = 0;
273 	int NOCASE = 0;
274 	int EXTENDED = 0;
275 	int reg_res;
276 	int nmatch = 32;
277 	regex_t *preg = NULL;
278 	char errbuf[1024];
279 	regmatch_t pmatch[32];
280         STRLEN n_a;
281 	register int i;
282     PPCODE:
283 
284 	for (i = 1; i < items; i++) {
285             if (SvROK(ST(i)) && SvTYPE((SV*)SvRV(ST(i))) == SVt_PVCV) {
286                 coderef = newSVsv(ST(i));
287             }
288             else {
289 		char *key = SvPV(ST(i), n_a);
290 		if (*key == '-') {
291 		    if (strnEQ(key+1, "rexopt", 6)) {
292 			char *val;
293 			i++;
294 			val = SvPV(ST(i), n_a);
295 			if (strchr(val, (int)'e'))
296 			    EXTENDED = 1;
297 			if (strchr(val, (int)'i'))
298 			    NOCASE = 1;
299 			continue;
300 		    }
301 		    else if (strnEQ(key+1, "rex", 3)) {
302 			i++;
303 			REGEX = SvTRUE(ST(i));
304 			continue;
305 		    }
306 		}
307 		if (!dbfile) {
308 		    dbfile = savepv(key);
309 		}
310 	    }
311         }
312 
313 	if (!dbfile)
314 	    croak("No database (shouldn't happen)");
315 
316         if ((fp = fopen (dbfile, "r")) == NULL)
317             XSRETURN_UNDEF;
318 
319         pathsize = 1026;		/* Increased as necessary by getstr.  */
320 	New(0, path, pathsize, char);
321 
322         nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
323         if (nread != sizeof (LOCATEDB_MAGIC) ||
324             memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC))) {
325             int i;
326             /* Read the list of the most common bigrams in the database.  */
327             fseek (fp, 0, 0);
328             for (i = 0; i < 128; i++) {
329                 bigram1[i] = getc (fp);
330                 bigram2[i] = getc (fp);
331             }
332             old_format = true;
333         }
334 
335         globflag =  strchr (pathpart, '*') ||
336                     strchr (pathpart, '?') ||
337                     strchr (pathpart, '[');
338 
339         patend = last_literal_end (pathpart);
340 
341 	if (REGEX) {
342 	    int flags = 0;
343 	    if (EXTENDED)
344 		flags |= REG_EXTENDED;
345 	    if (NOCASE)
346 		flags |= REG_ICASE;
347 	    New(0, preg, 1, regex_t);
348 	    if ((reg_res = regcomp(preg, pathpart, flags)) != 0) {
349 		    regerror(reg_res, preg, errbuf,1024);
350 		    croak("Invalid regular expression: %s\n", errbuf);
351 	    }
352 	}
353 
354         c = getc (fp);
355 
356         while (c != EOF) {
357             register char *s;		/* Scan the path we read in.  */
358 
359             if (old_format) {
360                 /* Get the offset in the path where this path info starts.  */
361                 if (c == LOCATEDB_OLD_ESCAPE)
362                     count += getw (fp) - LOCATEDB_OLD_OFFSET;
363                 else
364                     count += c - LOCATEDB_OLD_OFFSET;
365 
366                 /* Overlay the old path with the remainder of the new.  */
367                 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
368                     if (c < 0200)
369                         *s++ = c;		/* An ordinary character.  */
370                     else {
371                         /* Bigram markers have the high bit set. */
372                         c &= 0177;
373                         *s++ = bigram1[c];
374                         *s++ = bigram2[c];
375                     }
376                 *s-- = '\0';
377             }
378             else {
379                 if (c == LOCATEDB_ESCAPE)
380                     count += get_short (fp);
381                 else if (c > 127)
382                     count += c - 256;
383                 else
384                     count += c;
385 
386                 /* Overlay the old path with the remainder of the new.  */
387                 nread = getstr (&path, &pathsize, fp, '\0', count);
388                 if (nread < 0)
389                     break;
390                 c = getc (fp);
391                 /* Move to the last char in path. */
392                 s = path + count + nread - 2;
393                 assert (s[0] != '\0');
394                 assert (s[1] == '\0'); /* Our terminator.  */
395                 assert (s[2] == '\0'); /* Added by getstr.  */
396             }
397 
398             /* If the previous path matched, scan the whole path for the last
399                char in the subpattern.  If not, the shared prefix doesn't match
400                the pattern, so don't scan it for the last char.  */
401             cutoff = prev_fast_match ? path : path + count;
402 
403 	    if (REGEX) {
404 		if (regexec(preg,path,nmatch,pmatch,0) == 0) {
405 		    ++printed;
406 		    if (coderef) {
407 			call_coderef(coderef, path);
408 		    }
409 		    else if (GIMME_V == G_ARRAY)
410 			XPUSHs(sv_2mortal(newSVpvn(path, strlen(path))));
411 		    else {
412                         goto clean_up;
413 		    }
414 		}
415 	    }
416 	    else {
417 
418 		/* Search backward starting at the end of the path we just read in,
419 		   for the character at the end of the last glob-free subpattern in
420 		   PATHPART.  */
421 		for (prev_fast_match = false; s >= cutoff; s--) {
422 		    /* Fast first char check. */
423 		    if (*s == *patend) {
424 			char *s2;		/* Scan the path we read in. */
425 			register char *p2;	/* Scan `patend'.  */
426 
427 			for (s2 = s - 1, p2 = patend - 1;
428 			     *p2 != '\0' && *s2 == *p2;
429 			     s2--, p2--)
430 			    ;
431 			if (*p2 == '\0') {
432 			    /* Success on the fast match.  Compare the whole pattern
433 			       if it contains globbing characters.  */
434 			    prev_fast_match = true;
435 
436 			    if (globflag == false ||
437 				    fnmatch (pathpart, path, 0) == 0) {
438                                 printed++;
439 				if (coderef) {
440 				    call_coderef(coderef, path);
441 				}
442 				else if (GIMME_V == G_ARRAY) {
443 				    XPUSHs(sv_2mortal(newSVpvn(path, strlen(path))));
444 				}
445 				else {
446                                     goto clean_up;
447 				}
448 			    }
449 			    break;
450 			}
451 		    }
452 		}
453 	    } /* else (fnmatch)*/
454         }
455 clean_up:
456 	if (preg) {
457 	    regfree(preg);
458             Safefree(preg);
459         }
460 
461 	Safefree(dbfile);
462         Safefree(path);
463 
464         fclose(fp);
465 
466         if(GIMME_V == G_ARRAY)
467             XSRETURN(printed);
468         else if (printed && GIMME_V == G_SCALAR)
469             XSRETURN_YES;
470 
471         XSRETURN_NO;
472 
473 void
474 _slocate (str, ...)
475 	char *str;
476     PREINIT:
477 	char *dbfile = NULL;
478 	SV *coderef = NULL;
479 	int fd;
480 	short code_num;
481 	int pathlen=0;
482 	register char ch;
483 	int jump=0;
484 	int first=1;
485 	char *codedpath=NULL;
486 	char *code_ptr;
487 	int printit=0;
488 	int globflag=0;
489 	char *globptr1;
490 	struct stat statres;
491 	regex_t *preg=NULL;
492 	char errbuf[1024];
493 	int nmatch=32;
494 	regmatch_t pmatch[32];
495 	int reg_res;
496 	int bytes = -1;
497 	int ptr_offset;
498 	char one_char[1];
499 	char *begin_ptr;
500         int begin_offset=0;
501 	int tot_size = MIN_BLK;
502 	int cur_size;
503 	int code_tot_size = MIN_BLK;
504 
505 	char *bucket_of_holding=NULL;
506 	STRLEN n_a;
507 	/* these vars were global in slocate/main.c */
508 	int REGEX = 0;
509 	int NOCASE  = 0;
510 	int EXTENDED = 0;
511 
512 	char slevel = '1';
513 	int res = 0;
514 
515         bool prev_fast_match = false;    /* true if we found a fast match
516                                                (of patend) on the previous
517                                                path.  */
518 	register int i;
519     PPCODE:
520     {
521 	for (i = 1; i < items; i++) {
522             if (SvROK(ST(i)) && SvTYPE((SV*)SvRV(ST(i))) == SVt_PVCV) {
523                 coderef = newSVsv(ST(i));
524             }
525             else {
526 		char *key = SvPV(ST(i), n_a);
527 		if (*key == '-') {
528 		    if (strnEQ(key+1, "rexopt", 6)) {
529 			char *val;
530 			i++;
531 			val = SvPV(ST(i), n_a);
532 			if (strchr(val, (int)'e'))
533 			    EXTENDED = 1;
534 			if (strchr(val, (int)'i'))
535 			    NOCASE = 1;
536 			continue;
537 		    }
538 		    else if (strnEQ(key+1, "rex", 3)) {
539 			i++;
540 			REGEX = SvTRUE(ST(i));
541 			continue;
542 		    }
543 		}
544 		if (!dbfile) {
545 		    dbfile = savepv(key);
546 		}
547 	    }
548         }
549 
550 	if (!dbfile)
551 	    croak("No database (shouldn't happen)");
552 
553 	if ((fd = open(dbfile,O_RDONLY)) == -1) {
554 	    croak("Can't open dbfile '%s': %s\n", dbfile, strerror(errno));
555 	}
556 
557 	lstat(dbfile,&statres);
558 
559 	if (S_ISDIR(statres.st_mode)) {
560 	    croak("Database '%s' is a directory\n", dbfile);
561 	}
562 
563 	read(fd,one_char,1);
564 	slevel = *one_char;
565 
566 	New(0, codedpath, MIN_BLK, char);
567 	*codedpath = 0;
568 	code_ptr = codedpath;
569 
570 	if ((globptr1 = strchr(str,'*'))  != NULL ||
571 	    (globptr1 = strchr(str,'?'))  != NULL ||
572 	    ((globptr1 = strchr(str,'[')) != NULL &&
573 	    strchr(str,']') != NULL))
574 	    globflag = 1;
575 
576 	if (REGEX) {
577 	    New(0, preg, 1, regex_t);
578 	    if ((reg_res = regcomp(preg, str, NOCASE ? REG_ICASE : 0)) != 0) {
579 		    regerror(reg_res, preg, errbuf,1024);
580 		    croak("Invalid regular expression: %s\n", errbuf);
581 	    }
582 	}
583 
584 	New(0, bucket_of_holding, MIN_BLK, char);
585 	*bucket_of_holding = 0;
586 	begin_ptr = bucket_of_holding;
587 	tot_size = MIN_BLK;
588 	cur_size = 0;
589 	while (first || begin_ptr < bucket_of_holding+cur_size) {
590 
591 	    /* No 1 byte reads! */
592 
593 	    if (cur_size + MIN_BLK > tot_size) {
594 		    while (cur_size + MIN_BLK > tot_size)
595 			tot_size <<= 1;
596 		    begin_offset = begin_ptr - bucket_of_holding;
597 		    Renew(bucket_of_holding, tot_size, char);
598 		    begin_ptr = bucket_of_holding + begin_offset;
599 	    }
600 
601 
602 	    if (bytes != 0)
603 		bytes = read(fd,bucket_of_holding+cur_size,MIN_BLK-1);
604 
605 	    if (bytes == -1) {
606 		croak("Error reading from database: %s\n", strerror(errno));
607 	    }
608 
609 	    cur_size += bytes;
610 
611 	    code_num = (short)*begin_ptr;
612 	    begin_ptr += 1;
613 
614 	    if (code_num == SLOC_ESC) {
615 		    code_num = s_get_short(&begin_ptr);
616 	    } else if (code_num > 127)
617 		code_num = code_num - 256;
618 
619 	    /* FIXME sometimes pathlen is < 0 but it shouldn't be.
620 	     * corrupt database file?
621 	     * This could be from a bug in frcode() or decode_db(). I
622 	     * am leaning toward frcode() at the moment */
623 
624 	    code_ptr += code_num;
625 	    pathlen = code_ptr - codedpath;
626 
627 	    if (pathlen < 0) {
628 		croak("Error in dbfile '%s' (maybe corrupted?)\n", dbfile);
629 	    }
630 
631 	    jump = 0;
632 	    while (!jump) {
633 
634 		ch = *begin_ptr;
635 		begin_ptr++;
636 		pathlen++;
637 
638 		if (pathlen < 0)
639 		    croak("Error in dbfile '%s': pathlen == %d\n", dbfile, pathlen);
640 
641 		if (pathlen > code_tot_size) {
642 		    code_tot_size = pathlen * 2;
643 		    ptr_offset = code_ptr - codedpath;
644 		    Renew(codedpath, code_tot_size, char);
645 		    code_ptr = codedpath+ptr_offset;
646 		}
647 
648 		*(codedpath+(pathlen-1)) = ch;
649 
650 		if (!ch)
651 		    jump = 1;
652 
653 		/* FIXME: Handle if begin_ptr runs past buffer */
654 
655 		/* not quite sure what to do with this:
656 		   if (begin_ptr > bucket_of_holding+cur_size-1 && bytes) {
657 		   fprintf(stderr,"slocate fluky bug found.\n");
658 		   fprintf(stderr,"Ack! This shouldn't happen unless you have a path over 4096.\n");
659 		   fprintf(stderr,"This could also be a bogus or corrupt database.\n");
660 		   fprintf(stderr,"Report this as a bug to klindsay@mkintraweb.com\n");
661 		   exit(1);
662 		   }
663 		   */
664 
665 	    } /* while(!jump) */
666 
667 	    if (first) {
668 		code_ptr = code_ptr+strlen(codedpath);
669 		first=0;
670 	    }
671 
672 	    pathlen--;
673 
674 	    printit=0;
675 
676 	    if (REGEX) {
677 		if (regexec(preg,codedpath,nmatch,pmatch,0) == 0) {
678 		    if (slevel == '1') {
679 			if (UID == 0 || check_path_access(savepv(codedpath))) {
680 			    printit = 1;
681 			}
682 		    } else
683 			printit = 1;
684 		}
685 	    }
686 	    else {
687 		if (fnmatch(str, codedpath, 0) == 0) {
688 		    if (slevel == '1') {
689 			if (UID == 0 || check_path_access(savepv(codedpath))) {
690 			    printit = 1;
691 			}
692 		    } else
693 			printit = 1;
694 		}
695 	    }
696 	    if (printit) {
697 		res++;
698 		if (coderef)
699 		    call_coderef(coderef, codedpath);
700 		else if (GIMME_V == G_ARRAY) {
701 		    XPUSHs(sv_2mortal(newSVpvn(codedpath, strlen(codedpath))));
702 		}
703 		else {
704 		    goto clean_up;
705 		}
706 	    }
707 	}
708 clean_up:
709 
710 	close(fd);
711 	if (preg) {
712 	    regfree(preg);
713             Safefree(preg);
714         }
715 
716 	Safefree(dbfile);
717         Safefree(bucket_of_holding);
718         Safefree(codedpath);
719 
720 	if (GIMME_V == G_ARRAY)
721 	    XSRETURN(res);
722 	else {
723 	    if (res > 0)
724 		XSRETURN_YES;
725 	    XSRETURN_NO;
726 	}
727     }
728