1 #define PERL_NO_GET_CONTEXT
2 
3 #include "EXTERN.h"
4 #include "perl.h"
5 #include "XSUB.h"
6 
7 #include "bsd_glob.h"
8 
9 #define MY_CXT_KEY "File::Glob::_guts" XS_VERSION
10 
11 typedef struct {
12 #ifdef USE_ITHREADS
13     tTHX interp;
14 #endif
15     int		x_GLOB_ERROR;
16     HV *	x_GLOB_ENTRIES;
17     Perl_ophook_t	x_GLOB_OLD_OPHOOK;
18 } my_cxt_t;
19 
20 START_MY_CXT
21 
22 #define GLOB_ERROR	(MY_CXT.x_GLOB_ERROR)
23 
24 #include "const-c.inc"
25 
26 #ifdef WIN32
27 #define errfunc		NULL
28 #else
29 static int
30 errfunc(const char *foo, int bar) {
31   PERL_UNUSED_ARG(foo);
32   return !(bar == EACCES || bar == ENOENT || bar == ENOTDIR);
33 }
34 #endif
35 
36 static void
doglob(pTHX_ const char * pattern,int flags)37 doglob(pTHX_ const char *pattern, int flags)
38 {
39     dSP;
40     glob_t pglob;
41     int i;
42     int retval;
43     SV *tmp;
44     {
45 	dMY_CXT;
46 
47 	/* call glob */
48 	memset(&pglob, 0, sizeof(glob_t));
49 	retval = bsd_glob(pattern, flags, errfunc, &pglob);
50 	GLOB_ERROR = retval;
51 
52 	/* return any matches found */
53 	EXTEND(sp, pglob.gl_pathc);
54 	for (i = 0; i < pglob.gl_pathc; i++) {
55 	    /* printf("# bsd_glob: %s\n", pglob.gl_pathv[i]); */
56 	    tmp = newSVpvn_flags(pglob.gl_pathv[i], strlen(pglob.gl_pathv[i]),
57 				 SVs_TEMP);
58 	    TAINT;
59 	    SvTAINT(tmp);
60 	    PUSHs(tmp);
61 	}
62 	PUTBACK;
63 
64 	bsd_globfree(&pglob);
65     }
66 }
67 
68 static void
iterate(pTHX_ bool (* globber)(pTHX_ AV * entries,const char * pat,STRLEN len,bool is_utf8))69 iterate(pTHX_ bool(*globber)(pTHX_ AV *entries, const char *pat, STRLEN len, bool is_utf8))
70 {
71     dSP;
72     dMY_CXT;
73 
74     const char * const cxixpv = (char *)&PL_op;
75     STRLEN const cxixlen = sizeof(OP *);
76     AV *entries;
77     U32 const gimme = GIMME_V;
78     SV *patsv = POPs;
79     bool on_stack = FALSE;
80 
81     if (!MY_CXT.x_GLOB_ENTRIES) MY_CXT.x_GLOB_ENTRIES = newHV();
82     entries = (AV *)*(hv_fetch(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, 1));
83 
84     /* if we're just beginning, do it all first */
85     if (SvTYPE(entries) != SVt_PVAV) {
86         const char *pat;
87         STRLEN len;
88         bool is_utf8;
89 
90         /* glob without args defaults to $_ */
91         SvGETMAGIC(patsv);
92         if (
93             !SvOK(patsv)
94               && (patsv = DEFSV, SvGETMAGIC(patsv), !SvOK(patsv))
95             ) {
96             pat = "";
97             len = 0;
98             is_utf8 = 0;
99         }
100         else {
101             pat = SvPV_nomg(patsv,len);
102             is_utf8 = !!SvUTF8(patsv);
103             /* the lower-level code expects a null-terminated string */
104             if (!SvPOK(patsv) || pat != SvPVX(patsv) || pat[len] != '\0') {
105                 SV *newpatsv = newSVpvn_flags(pat, len, SVs_TEMP);
106                 pat = SvPV_nomg(newpatsv,len);
107             }
108         }
109 
110         if (!IS_SAFE_SYSCALL(pat, len, "pattern", "glob")) {
111             if (gimme != G_ARRAY)
112                 PUSHs(&PL_sv_undef);
113             PUTBACK;
114             return;
115         }
116 
117 	PUTBACK;
118 	on_stack = globber(aTHX_ entries, pat, len, is_utf8);
119 	SPAGAIN;
120     }
121 
122     /* chuck it all out, quick or slow */
123     if (gimme == G_ARRAY) {
124 	if (!on_stack && AvFILLp(entries) + 1) {
125 	    EXTEND(SP, AvFILLp(entries)+1);
126 	    Copy(AvARRAY(entries), SP+1, AvFILLp(entries)+1, SV *);
127 	    SP += AvFILLp(entries)+1;
128 	}
129 	/* No G_DISCARD here!  It will free the stack items. */
130 	(void)hv_delete(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, 0);
131     }
132     else {
133 	if (AvFILLp(entries) + 1) {
134 	    mPUSHs(av_shift(entries));
135 	}
136 	else {
137 	    /* return undef for EOL */
138 	    (void)hv_delete(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, G_DISCARD);
139 	    PUSHs(&PL_sv_undef);
140 	}
141     }
142     PUTBACK;
143 }
144 
145 /* returns true if the items are on the stack already, but only in
146    list context */
147 static bool
csh_glob(pTHX_ AV * entries,const char * pat,STRLEN len,bool is_utf8)148 csh_glob(pTHX_ AV *entries, const char *pat, STRLEN len, bool is_utf8)
149 {
150 	dSP;
151 	AV *patav = NULL;
152 	const char *patend;
153 	const char *s = NULL;
154 	const char *piece = NULL;
155 	SV *word = NULL;
156 	SV *flags_sv = get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD);
157 	int const flags = (int)SvIV(flags_sv);
158 	U32 const gimme = GIMME_V;
159 
160 	patend = pat + len;
161 
162 	assert(SvTYPE(entries) != SVt_PVAV);
163 	sv_upgrade((SV *)entries, SVt_PVAV);
164 
165 	/* extract patterns */
166 	s = pat-1;
167 	while (++s < patend) {
168 	    switch (*s) {
169 	    case '\'':
170 	    case '"' :
171 	      {
172 		bool found = FALSE;
173 		const char quote = *s;
174 		if (!word) {
175 		    word = newSVpvs("");
176 		    if (is_utf8) SvUTF8_on(word);
177 		}
178 		if (piece) sv_catpvn(word, piece, s-piece);
179 		piece = s+1;
180 		while (++s < patend)
181 		    if (*s == '\\') {
182 			s++;
183 			/* If the backslash is here to escape a quote,
184 			   obliterate it. */
185 			if (s < patend && *s == quote)
186 			    sv_catpvn(word, piece, s-piece-1), piece = s;
187 		    }
188 		    else if (*s == quote) {
189 			sv_catpvn(word, piece, s-piece);
190 			piece = NULL;
191 			found = TRUE;
192 			break;
193 		    }
194 		if (!found) { /* unmatched quote */
195 		    /* Give up on tokenisation and treat the whole string
196 		       as a single token, but with whitespace stripped. */
197 		    piece = pat;
198 		    while (isSPACE(*pat)) pat++;
199 		    while (isSPACE(*(patend-1))) patend--;
200 		    /* bsd_glob expects a trailing null, but we cannot mod-
201 		       ify the original */
202 		    if (patend < pat + len) {
203 			if (word) sv_setpvn(word, pat, patend-pat);
204 			else
205 			    word = newSVpvn_flags(
206 				pat, patend-pat, SVf_UTF8*is_utf8
207 			    );
208 			piece = NULL;
209 		    }
210 		    else {
211 			if (word) SvREFCNT_dec(word), word=NULL;
212 			piece = pat;
213 			s = patend;
214 		    }
215 		    goto end_of_parsing;
216 		}
217 		break;
218 	      }
219 	    case '\\':
220 		if (!piece) piece = s;
221 		s++;
222 		/* If the backslash is here to escape a quote,
223 		   obliterate it. */
224 		if (s < patend && (*s == '"' || *s == '\'')) {
225 		    if (!word) {
226 			word = newSVpvn(piece,s-piece-1);
227 			if (is_utf8) SvUTF8_on(word);
228 		    }
229 		    else sv_catpvn(word, piece, s-piece-1);
230 		    piece = s;
231 		}
232 		break;
233 	    default:
234 		if (isSPACE(*s)) {
235 		    if (piece) {
236 			if (!word) {
237 			    word = newSVpvn(piece,s-piece);
238 			    if (is_utf8) SvUTF8_on(word);
239 			}
240 			else sv_catpvn(word, piece, s-piece);
241 		    }
242 		    if (!word) break;
243 		    if (!patav) patav = (AV *)sv_2mortal((SV *)newAV());
244 		    av_push(patav, word);
245 		    word = NULL;
246 		    piece = NULL;
247 		}
248 		else if (!piece) piece = s;
249 		break;
250 	    }
251 	}
252       end_of_parsing:
253 
254 	if (patav) {
255 	    I32 items = AvFILLp(patav) + 1;
256 	    SV **svp = AvARRAY(patav);
257 	    while (items--) {
258 		PUSHMARK(SP);
259 		PUTBACK;
260 		doglob(aTHX_ SvPVXx(*svp++), flags);
261 		SPAGAIN;
262 		{
263 		    dMARK;
264 		    dORIGMARK;
265 		    while (++MARK <= SP)
266 			av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
267 		    SP = ORIGMARK;
268 		}
269 	    }
270 	}
271 	/* piece is set at this point if there is no trailing whitespace.
272 	   It is the beginning of the last token or quote-delimited
273 	   piece thereof.  word is set at this point if the last token has
274 	   multiple quoted pieces. */
275 	if (piece || word) {
276 	    if (word) {
277 		if (piece) sv_catpvn(word, piece, s-piece);
278 		piece = SvPVX(word);
279 	    }
280 	    PUSHMARK(SP);
281 	    PUTBACK;
282 	    doglob(aTHX_ piece, flags);
283 	    if (word) SvREFCNT_dec(word);
284 	    SPAGAIN;
285 	    {
286 		dMARK;
287 		dORIGMARK;
288 		/* short-circuit here for a fairly common case */
289 		if (!patav && gimme == G_ARRAY) { PUTBACK; return TRUE; }
290 		while (++MARK <= SP)
291 		    av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
292 
293 		SP = ORIGMARK;
294 	    }
295 	}
296 	PUTBACK;
297 	return FALSE;
298 }
299 
300 static void
csh_glob_iter(pTHX)301 csh_glob_iter(pTHX)
302 {
303     iterate(aTHX_ csh_glob);
304 }
305 
306 /* wrapper around doglob that can be passed to the iterator */
307 static bool
doglob_iter_wrapper(pTHX_ AV * entries,const char * pattern,STRLEN len,bool is_utf8)308 doglob_iter_wrapper(pTHX_ AV *entries, const char *pattern, STRLEN len, bool is_utf8)
309 {
310     dSP;
311     SV * flags_sv = get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD);
312     int const flags = (int)SvIV(flags_sv);
313 
314     PERL_UNUSED_VAR(len); /* we use \0 termination instead */
315     /* XXX we currently just use the underlying bytes of the passed SV.
316      * Some day someone needs to make glob utf8 aware */
317     PERL_UNUSED_VAR(is_utf8);
318 
319     PUSHMARK(SP);
320     PUTBACK;
321     doglob(aTHX_ pattern, flags);
322     SPAGAIN;
323     {
324 	dMARK;
325 	dORIGMARK;
326 	if (GIMME_V == G_ARRAY) { PUTBACK; return TRUE; }
327 	sv_upgrade((SV *)entries, SVt_PVAV);
328 	while (++MARK <= SP)
329 	    av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
330 	SP = ORIGMARK;
331     }
332     return FALSE;
333 }
334 
335 static void
glob_ophook(pTHX_ OP * o)336 glob_ophook(pTHX_ OP *o)
337 {
338   if (PL_dirty) return;
339   {
340     dMY_CXT;
341     if (MY_CXT.x_GLOB_ENTRIES
342      && (o->op_type == OP_GLOB || o->op_type == OP_ENTERSUB))
343 	(void)hv_delete(MY_CXT.x_GLOB_ENTRIES, (char *)&o, sizeof(OP *),
344 		  G_DISCARD);
345     if (MY_CXT.x_GLOB_OLD_OPHOOK) MY_CXT.x_GLOB_OLD_OPHOOK(aTHX_ o);
346   }
347 }
348 
349 MODULE = File::Glob		PACKAGE = File::Glob
350 
351 int
352 GLOB_ERROR()
353     PREINIT:
354 	dMY_CXT;
355     CODE:
356 	RETVAL = GLOB_ERROR;
357     OUTPUT:
358 	RETVAL
359 
360 void
361 bsd_glob(pattern_sv,...)
362     SV *pattern_sv
363 PREINIT:
364     int flags = 0;
365     char *pattern;
366     STRLEN len;
367 PPCODE:
368     {
369         pattern = SvPV(pattern_sv, len);
370         if (!IS_SAFE_SYSCALL(pattern, len, "pattern", "bsd_glob"))
371             XSRETURN(0);
372 	/* allow for optional flags argument */
373 	if (items > 1) {
374 	    flags = (int) SvIV(ST(1));
375 	    /* remove unsupported flags */
376 	    flags &= ~(GLOB_APPEND | GLOB_DOOFFS | GLOB_ALTDIRFUNC | GLOB_MAGCHAR);
377 	} else {
378 	    SV * flags_sv = get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD);
379 	    flags = (int)SvIV(flags_sv);
380 	}
381 
382 	PUTBACK;
383 	doglob(aTHX_ pattern, flags);
384 	SPAGAIN;
385     }
386 
387 PROTOTYPES: DISABLE
388 void
389 csh_glob(...)
390 PPCODE:
391     /* For backward-compatibility with the original Perl function, we sim-
392      * ply take the first argument, regardless of how many there are.
393      */
394     if (items) SP ++;
395     else {
396 	XPUSHs(&PL_sv_undef);
397     }
398     PUTBACK;
399     csh_glob_iter(aTHX);
400     SPAGAIN;
401 
402 void
403 bsd_glob_override(...)
404 PPCODE:
405     if (items) SP ++;
406     else {
407 	XPUSHs(&PL_sv_undef);
408     }
409     PUTBACK;
410     iterate(aTHX_ doglob_iter_wrapper);
411     SPAGAIN;
412 
413 #ifdef USE_ITHREADS
414 
415 void
416 CLONE(...)
417 INIT:
418     HV *glob_entries_clone = NULL;
419 CODE:
420     PERL_UNUSED_ARG(items);
421     {
422         dMY_CXT;
423         if ( MY_CXT.x_GLOB_ENTRIES ) {
424             CLONE_PARAMS param;
425             param.stashes    = NULL;
426             param.flags      = 0;
427             param.proto_perl = MY_CXT.interp;
428 
429             glob_entries_clone = MUTABLE_HV(sv_dup_inc((SV*)MY_CXT.x_GLOB_ENTRIES, &param));
430         }
431     }
432     {
433         MY_CXT_CLONE;
434         MY_CXT.x_GLOB_ENTRIES = glob_entries_clone;
435         MY_CXT.interp = aTHX;
436     }
437 
438 #endif
439 
440 BOOT:
441 {
442 #ifndef PERL_EXTERNAL_GLOB
443     /* Don't do this at home! The globhook interface is highly volatile. */
444     PL_globhook = csh_glob_iter;
445 #endif
446 }
447 
448 BOOT:
449 {
450     MY_CXT_INIT;
451     {
452 	dMY_CXT;
453 	MY_CXT.x_GLOB_ENTRIES = NULL;
454 	MY_CXT.x_GLOB_OLD_OPHOOK = PL_opfreehook;
455 #ifdef USE_ITHREADS
456         MY_CXT.interp = aTHX;
457 #endif
458 	PL_opfreehook = glob_ophook;
459     }
460 }
461 
462 INCLUDE: const-xs.inc
463