1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 #include "ppport.h"
6
7 #include "const-c.inc"
8
9 #include "locatedb.h"
10
11 #include <config.h>
12 #include <stdio.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <time.h>
16 #include <fnmatch.h>
17 #include <regex.h>
18 #include <unistd.h>
19 #include <fcntl.h>
20
21 #define NDEBUG
22 #include <assert.h>
23
24 #ifdef STDC_HEADERS
25 #include <stdlib.h>
26 #else
27 char *getenv ();
28 #endif
29
30 #ifdef STDC_HEADERS
31 #include <errno.h>
32 #include <stdlib.h>
33 #else
34 extern int errno;
35 #endif
36
37
38 #define WARNING 0
39 #define MIN_CHUNK 64
40 #define MIN_BLK 4096
41 #define ALLOC_SIZE 4096
42 #define SLOC_ESC -0x80
43
44 #ifndef call_sv
45 # define call_sv perl_call_sv
46 #endif
47
48 uid_t UID;
49 gid_t GID;
50
51 // typedef enum {false, true} boolean;
52 #include <stdbool.h>
53
last_literal_end(char * name)54 static char * last_literal_end (char *name) {
55 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
56 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
57 register char *subp; /* Return value. */
58 register char *p; /* Search location in NAME. */
59
60 /* Find the end of the subpattern.
61 Skip trailing metacharacters and [] ranges. */
62 for (p = name + strlen (name) - 1;
63 p >= name && strchr ("*?]", *p) != NULL;
64 p--) {
65
66 if (*p == ']')
67 while (p >= name && *p != '[')
68 p--;
69 }
70
71 if (p < name)
72 p = name;
73
74 if (p - name + 3 > gfalloc) {
75 gfalloc = p - name + 3 + 64; /* Room to grow. */
76 Renew(globfree, gfalloc, char);
77 }
78
79 subp = globfree;
80 *subp++ = '\0';
81
82 /* If the pattern has only metacharacters, make every path match the
83 subpattern, so it gets checked the slow way. */
84 if (p == name && strchr ("?*[]", *p) != NULL)
85 *subp++ = '/';
86 else {
87 char *endmark;
88 /* Find the start of the metacharacter-free subpattern. */
89 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
90 ;
91 /* Copy the subpattern into globfree. */
92 for (++p; p <= endmark; )
93 *subp++ = *p++;
94 }
95
96 *subp-- = '\0'; /* Null terminate, though it's not needed. */
97
98 return subp;
99 }
100
getstr(char ** lineptr,size_t * n,FILE * stream,char terminator,int offset)101 int getstr (char **lineptr, size_t *n, FILE *stream,
102 char terminator, int offset) {
103 int nchars_avail; /* Allocated but unused chars in *LINEPTR. */
104 char *read_pos; /* Where we're reading into *LINEPTR. */
105 int ret;
106
107 if (!lineptr || !n || !stream)
108 return -1;
109
110 if (!*lineptr) {
111 *n = MIN_CHUNK;
112 New(0, *lineptr, *n, char);
113 if (!*lineptr)
114 return -1;
115 }
116
117 nchars_avail = *n - offset;
118 read_pos = *lineptr + offset;
119
120 for (;;) {
121 register int c = getc (stream);
122
123 /* We always want at least one char left in the buffer, since we
124 always (unless we get an error while reading the first char)
125 NULL-terminate the line buffer. */
126
127 assert(*n - nchars_avail == read_pos - *lineptr);
128 if (nchars_avail < 1) {
129 if (*n > MIN_CHUNK)
130 *n *= 2;
131 else
132 *n += MIN_CHUNK;
133
134 nchars_avail = *n + *lineptr - read_pos;
135 Renew(*lineptr, *n, char);
136 if (!*lineptr)
137 return -1;
138 read_pos = *n - nchars_avail + *lineptr;
139 assert(*n - nchars_avail == read_pos - *lineptr);
140 }
141
142 if (c == EOF || ferror (stream)) {
143 /* Return partial line, if any. */
144 if (read_pos == *lineptr)
145 return -1;
146 else
147 break;
148 }
149
150 *read_pos++ = c;
151 nchars_avail--;
152
153 if (c == terminator)
154 /* Return the line. */
155 break;
156 }
157
158 /* Done - NUL terminate and return the number of chars read. */
159 *read_pos = '\0';
160
161 ret = read_pos - (*lineptr + offset);
162 return ret;
163 }
164
get_short(FILE * fp)165 static int get_short (FILE *fp) {
166 char x[2];
167 fread((void*)&x, 2, 1, fp);
168 return ((x[0]<<8)|(x[1]&0xff));
169 }
170
s_get_short(char ** fp)171 static int s_get_short (char **fp) {
172 register short x;
173 x = **fp;
174 /* move pointer one byte ahead */
175 (*fp)++;
176 return (x << 8) | (*((*fp)++) & 0xff);
177 }
178
check_path_access(char * codedpath)179 int check_path_access(char *codedpath) {
180 char *dir = NULL;
181 char *path = NULL;
182 int res;
183 char *str_ptr;
184
185 if (access(codedpath, R_OK) != 0) {
186 Safefree(codedpath);
187 return 0;
188 }
189
190 New(0, path, strlen(codedpath)+1, char);
191 *path = 0;
192
193 res = 1;
194 str_ptr = codedpath;
195
196 while ((dir = strtok(str_ptr, "/"))) {
197 strcat(path,"/");
198 strcat(path,dir);
199 if (access(path, R_OK) != 0) {
200 res = 0;
201 break;
202 }
203 str_ptr = NULL;
204 }
205
206 Safefree(codedpath);
207
208 Safefree(path);
209
210 return res;
211 }
212
call_coderef(SV * coderef,char * path)213 void call_coderef (SV *coderef, char *path) {
214 dSP;
215
216 /* FIXME We aren't yet prepared for lexical $_ as coming in 5.9.1 */
217 SAVESPTR(DEFSV);
218
219 ENTER;
220 SAVETMPS;
221 PUSHMARK(SP);
222 DEFSV = sv_2mortal(newSVpvn(path, strlen(path)));
223 PUTBACK;
224 (void) call_sv(coderef, G_DISCARD);
225
226 FREETMPS;
227 LEAVE;
228 }
229 #define WARN fprintf(stderr, "%i\n", __LINE__);
230
231 MODULE = File::Locate PACKAGE = File::Locate
232
233 INCLUDE: const-xs.inc
234
235 BOOT:
236 {
237 UID = getuid();
238 GID = getgid();
239 }
240
241 void
242 _locate (pathpart, ...)
243 char *pathpart;
244 PROTOTYPE: DISABLE
245 PREINIT:
246 char *dbfile = NULL;
247 SV *coderef = NULL;
248 FILE *fp; /* The pathname database. */
249 int c; /* An input byte. */
250 int nread; /* Number of bytes read from an entry. */
251 bool globflag; /* true if PATHPART contains globbing
252 metacharacters. */
253 char *patend; /* The end of the last glob-free subpattern
254 in PATHPART. */
255 char *path; /* The current input database entry. */
256 size_t pathsize; /* Amount allocated for it. */
257 int count = 0; /* The length of the prefix shared with
258 the previous database entry. */
259 char *cutoff; /* Where in `path' to stop the backward search for
260 the last character in the subpattern. Set
261 according to `count'. */
262 bool prev_fast_match = false; /* true if we found a fast match
263 (of patend) on the previous
264 path. */
265 int printed = 0; /* The return value. */
266 bool old_format = false; /* true if reading a bigram-encoded
267 database. */
268 char bigram1[128], bigram2[128]; /* For the old database format, the
269 first and second characters of
270 the most common bigrams. */
271 /* regex stuff */
272 int REGEX = 0;
273 int NOCASE = 0;
274 int EXTENDED = 0;
275 int reg_res;
276 int nmatch = 32;
277 regex_t *preg = NULL;
278 char errbuf[1024];
279 regmatch_t pmatch[32];
280 STRLEN n_a;
281 register int i;
282 PPCODE:
283
284 for (i = 1; i < items; i++) {
285 if (SvROK(ST(i)) && SvTYPE((SV*)SvRV(ST(i))) == SVt_PVCV) {
286 coderef = newSVsv(ST(i));
287 }
288 else {
289 char *key = SvPV(ST(i), n_a);
290 if (*key == '-') {
291 if (strnEQ(key+1, "rexopt", 6)) {
292 char *val;
293 i++;
294 val = SvPV(ST(i), n_a);
295 if (strchr(val, (int)'e'))
296 EXTENDED = 1;
297 if (strchr(val, (int)'i'))
298 NOCASE = 1;
299 continue;
300 }
301 else if (strnEQ(key+1, "rex", 3)) {
302 i++;
303 REGEX = SvTRUE(ST(i));
304 continue;
305 }
306 }
307 if (!dbfile) {
308 dbfile = savepv(key);
309 }
310 }
311 }
312
313 if (!dbfile)
314 croak("No database (shouldn't happen)");
315
316 if ((fp = fopen (dbfile, "r")) == NULL)
317 XSRETURN_UNDEF;
318
319 pathsize = 1026; /* Increased as necessary by getstr. */
320 New(0, path, pathsize, char);
321
322 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
323 if (nread != sizeof (LOCATEDB_MAGIC) ||
324 memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC))) {
325 int i;
326 /* Read the list of the most common bigrams in the database. */
327 fseek (fp, 0, 0);
328 for (i = 0; i < 128; i++) {
329 bigram1[i] = getc (fp);
330 bigram2[i] = getc (fp);
331 }
332 old_format = true;
333 }
334
335 globflag = strchr (pathpart, '*') ||
336 strchr (pathpart, '?') ||
337 strchr (pathpart, '[');
338
339 patend = last_literal_end (pathpart);
340
341 if (REGEX) {
342 int flags = 0;
343 if (EXTENDED)
344 flags |= REG_EXTENDED;
345 if (NOCASE)
346 flags |= REG_ICASE;
347 New(0, preg, 1, regex_t);
348 if ((reg_res = regcomp(preg, pathpart, flags)) != 0) {
349 regerror(reg_res, preg, errbuf,1024);
350 croak("Invalid regular expression: %s\n", errbuf);
351 }
352 }
353
354 c = getc (fp);
355
356 while (c != EOF) {
357 register char *s; /* Scan the path we read in. */
358
359 if (old_format) {
360 /* Get the offset in the path where this path info starts. */
361 if (c == LOCATEDB_OLD_ESCAPE)
362 count += getw (fp) - LOCATEDB_OLD_OFFSET;
363 else
364 count += c - LOCATEDB_OLD_OFFSET;
365
366 /* Overlay the old path with the remainder of the new. */
367 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
368 if (c < 0200)
369 *s++ = c; /* An ordinary character. */
370 else {
371 /* Bigram markers have the high bit set. */
372 c &= 0177;
373 *s++ = bigram1[c];
374 *s++ = bigram2[c];
375 }
376 *s-- = '\0';
377 }
378 else {
379 if (c == LOCATEDB_ESCAPE)
380 count += get_short (fp);
381 else if (c > 127)
382 count += c - 256;
383 else
384 count += c;
385
386 /* Overlay the old path with the remainder of the new. */
387 nread = getstr (&path, &pathsize, fp, '\0', count);
388 if (nread < 0)
389 break;
390 c = getc (fp);
391 /* Move to the last char in path. */
392 s = path + count + nread - 2;
393 assert (s[0] != '\0');
394 assert (s[1] == '\0'); /* Our terminator. */
395 assert (s[2] == '\0'); /* Added by getstr. */
396 }
397
398 /* If the previous path matched, scan the whole path for the last
399 char in the subpattern. If not, the shared prefix doesn't match
400 the pattern, so don't scan it for the last char. */
401 cutoff = prev_fast_match ? path : path + count;
402
403 if (REGEX) {
404 if (regexec(preg,path,nmatch,pmatch,0) == 0) {
405 ++printed;
406 if (coderef) {
407 call_coderef(coderef, path);
408 }
409 else if (GIMME_V == G_ARRAY)
410 XPUSHs(sv_2mortal(newSVpvn(path, strlen(path))));
411 else {
412 goto clean_up;
413 }
414 }
415 }
416 else {
417
418 /* Search backward starting at the end of the path we just read in,
419 for the character at the end of the last glob-free subpattern in
420 PATHPART. */
421 for (prev_fast_match = false; s >= cutoff; s--) {
422 /* Fast first char check. */
423 if (*s == *patend) {
424 char *s2; /* Scan the path we read in. */
425 register char *p2; /* Scan `patend'. */
426
427 for (s2 = s - 1, p2 = patend - 1;
428 *p2 != '\0' && *s2 == *p2;
429 s2--, p2--)
430 ;
431 if (*p2 == '\0') {
432 /* Success on the fast match. Compare the whole pattern
433 if it contains globbing characters. */
434 prev_fast_match = true;
435
436 if (globflag == false ||
437 fnmatch (pathpart, path, 0) == 0) {
438 printed++;
439 if (coderef) {
440 call_coderef(coderef, path);
441 }
442 else if (GIMME_V == G_ARRAY) {
443 XPUSHs(sv_2mortal(newSVpvn(path, strlen(path))));
444 }
445 else {
446 goto clean_up;
447 }
448 }
449 break;
450 }
451 }
452 }
453 } /* else (fnmatch)*/
454 }
455 clean_up:
456 if (preg) {
457 regfree(preg);
458 Safefree(preg);
459 }
460
461 Safefree(dbfile);
462 Safefree(path);
463
464 fclose(fp);
465
466 if(GIMME_V == G_ARRAY)
467 XSRETURN(printed);
468 else if (printed && GIMME_V == G_SCALAR)
469 XSRETURN_YES;
470
471 XSRETURN_NO;
472
473 void
474 _slocate (str, ...)
475 char *str;
476 PREINIT:
477 char *dbfile = NULL;
478 SV *coderef = NULL;
479 int fd;
480 short code_num;
481 int pathlen=0;
482 register char ch;
483 int jump=0;
484 int first=1;
485 char *codedpath=NULL;
486 char *code_ptr;
487 int printit=0;
488 int globflag=0;
489 char *globptr1;
490 struct stat statres;
491 regex_t *preg=NULL;
492 char errbuf[1024];
493 int nmatch=32;
494 regmatch_t pmatch[32];
495 int reg_res;
496 int bytes = -1;
497 int ptr_offset;
498 char one_char[1];
499 char *begin_ptr;
500 int begin_offset=0;
501 int tot_size = MIN_BLK;
502 int cur_size;
503 int code_tot_size = MIN_BLK;
504
505 char *bucket_of_holding=NULL;
506 STRLEN n_a;
507 /* these vars were global in slocate/main.c */
508 int REGEX = 0;
509 int NOCASE = 0;
510 int EXTENDED = 0;
511
512 char slevel = '1';
513 int res = 0;
514
515 bool prev_fast_match = false; /* true if we found a fast match
516 (of patend) on the previous
517 path. */
518 register int i;
519 PPCODE:
520 {
521 for (i = 1; i < items; i++) {
522 if (SvROK(ST(i)) && SvTYPE((SV*)SvRV(ST(i))) == SVt_PVCV) {
523 coderef = newSVsv(ST(i));
524 }
525 else {
526 char *key = SvPV(ST(i), n_a);
527 if (*key == '-') {
528 if (strnEQ(key+1, "rexopt", 6)) {
529 char *val;
530 i++;
531 val = SvPV(ST(i), n_a);
532 if (strchr(val, (int)'e'))
533 EXTENDED = 1;
534 if (strchr(val, (int)'i'))
535 NOCASE = 1;
536 continue;
537 }
538 else if (strnEQ(key+1, "rex", 3)) {
539 i++;
540 REGEX = SvTRUE(ST(i));
541 continue;
542 }
543 }
544 if (!dbfile) {
545 dbfile = savepv(key);
546 }
547 }
548 }
549
550 if (!dbfile)
551 croak("No database (shouldn't happen)");
552
553 if ((fd = open(dbfile,O_RDONLY)) == -1) {
554 croak("Can't open dbfile '%s': %s\n", dbfile, strerror(errno));
555 }
556
557 lstat(dbfile,&statres);
558
559 if (S_ISDIR(statres.st_mode)) {
560 croak("Database '%s' is a directory\n", dbfile);
561 }
562
563 read(fd,one_char,1);
564 slevel = *one_char;
565
566 New(0, codedpath, MIN_BLK, char);
567 *codedpath = 0;
568 code_ptr = codedpath;
569
570 if ((globptr1 = strchr(str,'*')) != NULL ||
571 (globptr1 = strchr(str,'?')) != NULL ||
572 ((globptr1 = strchr(str,'[')) != NULL &&
573 strchr(str,']') != NULL))
574 globflag = 1;
575
576 if (REGEX) {
577 New(0, preg, 1, regex_t);
578 if ((reg_res = regcomp(preg, str, NOCASE ? REG_ICASE : 0)) != 0) {
579 regerror(reg_res, preg, errbuf,1024);
580 croak("Invalid regular expression: %s\n", errbuf);
581 }
582 }
583
584 New(0, bucket_of_holding, MIN_BLK, char);
585 *bucket_of_holding = 0;
586 begin_ptr = bucket_of_holding;
587 tot_size = MIN_BLK;
588 cur_size = 0;
589 while (first || begin_ptr < bucket_of_holding+cur_size) {
590
591 /* No 1 byte reads! */
592
593 if (cur_size + MIN_BLK > tot_size) {
594 while (cur_size + MIN_BLK > tot_size)
595 tot_size <<= 1;
596 begin_offset = begin_ptr - bucket_of_holding;
597 Renew(bucket_of_holding, tot_size, char);
598 begin_ptr = bucket_of_holding + begin_offset;
599 }
600
601
602 if (bytes != 0)
603 bytes = read(fd,bucket_of_holding+cur_size,MIN_BLK-1);
604
605 if (bytes == -1) {
606 croak("Error reading from database: %s\n", strerror(errno));
607 }
608
609 cur_size += bytes;
610
611 code_num = (short)*begin_ptr;
612 begin_ptr += 1;
613
614 if (code_num == SLOC_ESC) {
615 code_num = s_get_short(&begin_ptr);
616 } else if (code_num > 127)
617 code_num = code_num - 256;
618
619 /* FIXME sometimes pathlen is < 0 but it shouldn't be.
620 * corrupt database file?
621 * This could be from a bug in frcode() or decode_db(). I
622 * am leaning toward frcode() at the moment */
623
624 code_ptr += code_num;
625 pathlen = code_ptr - codedpath;
626
627 if (pathlen < 0) {
628 croak("Error in dbfile '%s' (maybe corrupted?)\n", dbfile);
629 }
630
631 jump = 0;
632 while (!jump) {
633
634 ch = *begin_ptr;
635 begin_ptr++;
636 pathlen++;
637
638 if (pathlen < 0)
639 croak("Error in dbfile '%s': pathlen == %d\n", dbfile, pathlen);
640
641 if (pathlen > code_tot_size) {
642 code_tot_size = pathlen * 2;
643 ptr_offset = code_ptr - codedpath;
644 Renew(codedpath, code_tot_size, char);
645 code_ptr = codedpath+ptr_offset;
646 }
647
648 *(codedpath+(pathlen-1)) = ch;
649
650 if (!ch)
651 jump = 1;
652
653 /* FIXME: Handle if begin_ptr runs past buffer */
654
655 /* not quite sure what to do with this:
656 if (begin_ptr > bucket_of_holding+cur_size-1 && bytes) {
657 fprintf(stderr,"slocate fluky bug found.\n");
658 fprintf(stderr,"Ack! This shouldn't happen unless you have a path over 4096.\n");
659 fprintf(stderr,"This could also be a bogus or corrupt database.\n");
660 fprintf(stderr,"Report this as a bug to klindsay@mkintraweb.com\n");
661 exit(1);
662 }
663 */
664
665 } /* while(!jump) */
666
667 if (first) {
668 code_ptr = code_ptr+strlen(codedpath);
669 first=0;
670 }
671
672 pathlen--;
673
674 printit=0;
675
676 if (REGEX) {
677 if (regexec(preg,codedpath,nmatch,pmatch,0) == 0) {
678 if (slevel == '1') {
679 if (UID == 0 || check_path_access(savepv(codedpath))) {
680 printit = 1;
681 }
682 } else
683 printit = 1;
684 }
685 }
686 else {
687 if (fnmatch(str, codedpath, 0) == 0) {
688 if (slevel == '1') {
689 if (UID == 0 || check_path_access(savepv(codedpath))) {
690 printit = 1;
691 }
692 } else
693 printit = 1;
694 }
695 }
696 if (printit) {
697 res++;
698 if (coderef)
699 call_coderef(coderef, codedpath);
700 else if (GIMME_V == G_ARRAY) {
701 XPUSHs(sv_2mortal(newSVpvn(codedpath, strlen(codedpath))));
702 }
703 else {
704 goto clean_up;
705 }
706 }
707 }
708 clean_up:
709
710 close(fd);
711 if (preg) {
712 regfree(preg);
713 Safefree(preg);
714 }
715
716 Safefree(dbfile);
717 Safefree(bucket_of_holding);
718 Safefree(codedpath);
719
720 if (GIMME_V == G_ARRAY)
721 XSRETURN(res);
722 else {
723 if (res > 0)
724 XSRETURN_YES;
725 XSRETURN_NO;
726 }
727 }
728