1=head1 NAME
2
3File::Basename - Parse file paths into directory, filename and suffix.
4
5=head1 SYNOPSIS
6
7    use File::Basename;
8
9    ($name,$path,$suffix) = fileparse($fullname,@suffixlist);
10    $name = fileparse($fullname,@suffixlist);
11
12    $basename = basename($fullname,@suffixlist);
13    $dirname  = dirname($fullname);
14
15
16=head1 DESCRIPTION
17
18These routines allow you to parse file paths into their directory, filename
19and suffix.
20
21B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and
22quirks, of the shell and C functions of the same name.  See each
23function's documentation for details.  If your concern is just parsing
24paths it is safer to use L<File::Spec>'s C<splitpath()> and
25C<splitdir()> methods.
26
27It is guaranteed that
28
29    # Where $path_separator is / for Unix, \ for Windows, etc...
30    dirname($path) . $path_separator . basename($path);
31
32is equivalent to the original path for all systems but VMS.
33
34
35=cut
36
37
38package File::Basename;
39
40# File::Basename is used during the Perl build, when the re extension may
41# not be available, but we only actually need it if running under tainting.
42BEGIN {
43  if (${^TAINT}) {
44    require re;
45    re->import('taint');
46  }
47}
48
49
50use strict;
51use 5.006;
52use warnings;
53our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
54require Exporter;
55@ISA = qw(Exporter);
56@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
57$VERSION = "2.85";
58
59fileparse_set_fstype($^O);
60
61
62=over 4
63
64=item C<fileparse>
65X<fileparse>
66
67    my($filename, $dirs, $suffix) = fileparse($path);
68    my($filename, $dirs, $suffix) = fileparse($path, @suffixes);
69    my $filename                  = fileparse($path, @suffixes);
70
71The C<fileparse()> routine divides a file path into its $dirs, $filename
72and (optionally) the filename $suffix.
73
74$dirs contains everything up to and including the last
75directory separator in the $path including the volume (if applicable).
76The remainder of the $path is the $filename.
77
78     # On Unix returns ("baz", "/foo/bar/", "")
79     fileparse("/foo/bar/baz");
80
81     # On Windows returns ("baz", 'C:\foo\bar\', "")
82     fileparse('C:\foo\bar\baz');
83
84     # On Unix returns ("", "/foo/bar/baz/", "")
85     fileparse("/foo/bar/baz/");
86
87If @suffixes are given each element is a pattern (either a string or a
88C<qr//>) matched against the end of the $filename.  The matching
89portion is removed and becomes the $suffix.
90
91     # On Unix returns ("baz", "/foo/bar/", ".txt")
92     fileparse("/foo/bar/baz.txt", qr/\.[^.]*/);
93
94If type is non-Unix (see L</fileparse_set_fstype>) then the pattern
95matching for suffix removal is performed case-insensitively, since
96those systems are not case-sensitive when opening existing files.
97
98You are guaranteed that C<$dirs . $filename . $suffix> will
99denote the same location as the original $path.
100
101=cut
102
103
104sub fileparse {
105  my($fullname,@suffices) = @_;
106
107  unless (defined $fullname) {
108      require Carp;
109      Carp::croak("fileparse(): need a valid pathname");
110  }
111
112  my $orig_type = '';
113  my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
114
115  my($taint) = substr($fullname,0,0);  # Is $fullname tainted?
116
117  if ($type eq "VMS" and $fullname =~ m{/} ) {
118    # We're doing Unix emulation
119    $orig_type = $type;
120    $type = 'Unix';
121  }
122
123  my($dirpath, $basename);
124
125  if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
126    ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
127    $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
128  }
129  elsif ($type eq "OS2") {
130    ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
131    $dirpath = './' unless $dirpath;	# Can't be 0
132    $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
133  }
134  elsif ($type eq "MacOS") {
135    ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
136    $dirpath = ':' unless $dirpath;
137  }
138  elsif ($type eq "AmigaOS") {
139    ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
140    $dirpath = './' unless $dirpath;
141  }
142  elsif ($type eq 'VMS' ) {
143    ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
144    $dirpath ||= '';  # should always be defined
145  }
146  else { # Default to Unix semantics.
147    ($dirpath,$basename) = ($fullname =~ m{^(.*/)?(.*)}s);
148    if ($orig_type eq 'VMS' and $fullname =~ m{^(/[^/]+/000000(/|$))(.*)}) {
149      # dev:[000000] is top of VMS tree, similar to Unix '/'
150      # so strip it off and treat the rest as "normal"
151      my $devspec  = $1;
152      my $remainder = $3;
153      ($dirpath,$basename) = ($remainder =~ m{^(.*/)?(.*)}s);
154      $dirpath ||= '';  # should always be defined
155      $dirpath = $devspec.$dirpath;
156    }
157    $dirpath = './' unless $dirpath;
158  }
159
160
161  my $tail   = '';
162  my $suffix = '';
163  if (@suffices) {
164    foreach $suffix (@suffices) {
165      my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
166      if ($basename =~ s/$pat//s) {
167        $taint .= substr($suffix,0,0);
168        $tail = $1 . $tail;
169      }
170    }
171  }
172
173  # Ensure taint is propagated from the path to its pieces.
174  $tail .= $taint;
175  wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
176            : ($basename .= $taint);
177}
178
179
180
181=item C<basename>
182X<basename> X<filename>
183
184    my $filename = basename($path);
185    my $filename = basename($path, @suffixes);
186
187This function is provided for compatibility with the Unix shell command
188C<basename(1)>.  It does B<NOT> always return the file name portion of a
189path as you might expect.  To be safe, if you want the file name portion of
190a path use C<fileparse()>.
191
192C<basename()> returns the last level of a filepath even if the last
193level is clearly directory.  In effect, it is acting like C<pop()> for
194paths.  This differs from C<fileparse()>'s behaviour.
195
196    # Both return "bar"
197    basename("/foo/bar");
198    basename("/foo/bar/");
199
200@suffixes work as in C<fileparse()> except all regex metacharacters are
201quoted.
202
203    # These two function calls are equivalent.
204    my $filename = basename("/foo/bar/baz.txt",  ".txt");
205    my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
206
207Also note that in order to be compatible with the shell command,
208C<basename()> does not strip off a suffix if it is identical to the
209remaining characters in the filename.
210
211=cut
212
213
214sub basename {
215  my($path) = shift;
216
217  # From BSD basename(1)
218  # The basename utility deletes any prefix ending with the last slash '/'
219  # character present in string (after first stripping trailing slashes)
220  _strip_trailing_sep($path);
221
222  my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) );
223
224  # From BSD basename(1)
225  # The suffix is not stripped if it is identical to the remaining
226  # characters in string.
227  if( length $suffix and !length $basename ) {
228      $basename = $suffix;
229  }
230
231  # Ensure that basename '/' == '/'
232  if( !length $basename ) {
233      $basename = $dirname;
234  }
235
236  return $basename;
237}
238
239
240
241=item C<dirname>
242X<dirname>
243
244This function is provided for compatibility with the Unix shell
245command C<dirname(1)> and has inherited some of its quirks.  In spite of
246its name it does B<NOT> always return the directory name as you might
247expect.  To be safe, if you want the directory name of a path use
248C<fileparse()>.
249
250Only on VMS (where there is no ambiguity between the file and directory
251portions of a path) and AmigaOS (possibly due to an implementation quirk in
252this module) does C<dirname()> work like C<fileparse($path)>, returning just the
253$dirs.
254
255    # On VMS and AmigaOS
256    my $dirs = dirname($path);
257
258When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
259which is subtly different from how C<fileparse()> works.  It returns all but
260the last level of a file path even if the last level is clearly a directory.
261In effect, it is not returning the directory portion but simply the path one
262level up acting like C<chop()> for file paths.
263
264Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
265its returned path.
266
267    # returns /foo/bar.  fileparse() would return /foo/bar/
268    dirname("/foo/bar/baz");
269
270    # also returns /foo/bar despite the fact that baz is clearly a
271    # directory.  fileparse() would return /foo/bar/baz/
272    dirname("/foo/bar/baz/");
273
274    # returns '.'.  fileparse() would return 'foo/'
275    dirname("foo/");
276
277Under VMS, if there is no directory information in the $path, then the
278current default device and directory is used.
279
280=cut
281
282
283sub dirname {
284    my $path = shift;
285
286    my($type) = $Fileparse_fstype;
287
288    if( $type eq 'VMS' and $path =~ m{/} ) {
289        # Parse as Unix
290        local($File::Basename::Fileparse_fstype) = '';
291        return dirname($path);
292    }
293
294    my($basename, $dirname) = fileparse($path);
295
296    if ($type eq 'VMS') {
297        $dirname ||= $ENV{DEFAULT};
298    }
299    elsif ($type eq 'MacOS') {
300	if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
301            _strip_trailing_sep($dirname);
302	    ($basename,$dirname) = fileparse $dirname;
303	}
304	$dirname .= ":" unless $dirname =~ /:\z/;
305    }
306    elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
307        _strip_trailing_sep($dirname);
308        unless( length($basename) ) {
309	    ($basename,$dirname) = fileparse $dirname;
310	    _strip_trailing_sep($dirname);
311	}
312    }
313    elsif ($type eq 'AmigaOS') {
314        if ( $dirname =~ /:\z/) { return $dirname }
315        chop $dirname;
316        $dirname =~ s{[^:/]+\z}{} unless length($basename);
317    }
318    else {
319        _strip_trailing_sep($dirname);
320        unless( length($basename) ) {
321	    ($basename,$dirname) = fileparse $dirname;
322	    _strip_trailing_sep($dirname);
323	}
324    }
325
326    $dirname;
327}
328
329
330# Strip the trailing path separator.
331sub _strip_trailing_sep  {
332    my $type = $Fileparse_fstype;
333
334    if ($type eq 'MacOS') {
335        $_[0] =~ s/([^:]):\z/$1/s;
336    }
337    elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
338        $_[0] =~ s/([^:])[\\\/]*\z/$1/;
339    }
340    else {
341        $_[0] =~ s{(.)/*\z}{$1}s;
342    }
343}
344
345
346=item C<fileparse_set_fstype>
347X<filesystem>
348
349  my $type = fileparse_set_fstype();
350  my $previous_type = fileparse_set_fstype($type);
351
352Normally File::Basename will assume a file path type native to your current
353operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
354With this function you can override that assumption.
355
356Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
357"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
358"Epoc" and "Unix" (all case-insensitive).  If an unrecognized $type is
359given "Unix" will be assumed.
360
361If you've selected VMS syntax, and the file specification you pass to
362one of these routines contains a "/", they assume you are using Unix
363emulation and apply the Unix syntax rules instead, for that function
364call only.
365
366=back
367
368=cut
369
370
371BEGIN {
372
373my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
374my @Types = (@Ignore_Case, qw(Unix));
375
376sub fileparse_set_fstype {
377    my $old = $Fileparse_fstype;
378
379    if (@_) {
380        my $new_type = shift;
381
382        $Fileparse_fstype = 'Unix';  # default
383        foreach my $type (@Types) {
384            $Fileparse_fstype = $type if $new_type =~ /^$type/i;
385        }
386
387        $Fileparse_igncase =
388          (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
389    }
390
391    return $old;
392}
393
394}
395
396
3971;
398
399
400=head1 SEE ALSO
401
402L<dirname(1)>, L<basename(1)>, L<File::Spec>
403