1%% Vendored from hex_core v0.8.2, do not edit manually
2
3%% @private
4%% Copied from https://github.com/erlang/otp/blob/OTP-20.0.1/lib/stdlib/src/erl_tar.erl
5%% with modifications:
6%% - Change module name to `mix_hex_erl_tar`
7%% - Set tar mtimes to 0 and remove dependency on :os.system_time/1
8%% - Preserve modes when building tarball
9%% - Do not crash if failing to write tar
10%% - Allow setting file_info opts on :mix_hex_erl_tar.add
11%% - Add safe_relative_path_links/2 to check directory traversal vulnerability when extracting files,
12%%   it differs from OTP's current fix (2020-02-04) in that it checks regular files instead of
13%%   symlink targets. This allows creating symlinks with relative path targets such as `../tmp/log`
14%% - Remove ram_file usage (backported from OTP master)
15
16%%
17%% %CopyrightBegin%
18%%
19%% Copyright Ericsson AB 1997-2017. All Rights Reserved.
20%%
21%% Licensed under the Apache License, Version 2.0 (the "License");
22%% you may not use this file except in compliance with the License.
23%% You may obtain a copy of the License at
24%%
25%%     http://www.apache.org/licenses/LICENSE-2.0
26%%
27%% Unless required by applicable law or agreed to in writing, software
28%% distributed under the License is distributed on an "AS IS" BASIS,
29%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
30%% See the License for the specific language governing permissions and
31%% limitations under the License.
32%%
33%% %CopyrightEnd%
34%%
35%% This module implements extraction/creation of tar archives.
36%% It supports reading most common tar formats, namely V7, STAR,
37%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR
38%% format, unless it must use PAX headers, in which case it produces PAX
39%% format.
40%%
41%% The following references where used:
42%%   http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
43%%   http://www.gnu.org/software/tar/manual/html_node/Standard.html
44%%   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
45-module(mix_hex_erl_tar).
46
47-export([init/3,
48         create/2, create/3,
49         extract/1, extract/2,
50         table/1, table/2, t/1, tt/1,
51         open/2, close/1,
52         add/3, add/4, add/5,
53         format_error/1]).
54
55-include_lib("kernel/include/file.hrl").
56-include_lib("mix_hex_erl_tar.hrl").
57
58%% Converts the short error reason to a descriptive string.
59-spec format_error(term()) -> string().
60format_error(invalid_tar_checksum) ->
61    "Checksum failed";
62format_error(bad_header) ->
63    "Unrecognized tar header format";
64format_error({bad_header, Reason}) ->
65    lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason]));
66format_error({invalid_header, negative_size}) ->
67    "Invalid header: negative size";
68format_error(invalid_sparse_header_size) ->
69    "Invalid sparse header: negative size";
70format_error(invalid_sparse_map_entry) ->
71    "Invalid sparse map entry";
72format_error({invalid_sparse_map_entry, Reason}) ->
73    lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason]));
74format_error(invalid_end_of_archive) ->
75    "Invalid end of archive";
76format_error(eof) ->
77    "Unexpected end of file";
78format_error(integer_overflow) ->
79    "Failed to parse numeric: integer overflow";
80format_error({misaligned_read, Pos}) ->
81    lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p",
82                                [?BLOCK_SIZE, Pos]));
83format_error(invalid_gnu_1_0_sparsemap) ->
84    "Invalid GNU sparse map (version 1.0)";
85format_error({invalid_gnu_0_1_sparsemap, Format}) ->
86    lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
87format_error(unsafe_path) ->
88    "The path points above the current working directory";
89format_error({Name,Reason}) ->
90    lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
91format_error(Atom) when is_atom(Atom) ->
92    file:format_error(Atom);
93format_error(Term) ->
94    lists:flatten(io_lib:format("~tp", [Term])).
95
96%% Initializes a new reader given a custom file handle and I/O wrappers
97-spec init(handle(), write | read, file_op()) -> {ok, reader()} | {error, badarg}.
98init(Handle, AccessMode, Fun) when is_function(Fun, 2) ->
99    Reader = #reader{handle=Handle,access=AccessMode,func=Fun},
100    {ok, Pos, Reader2} = do_position(Reader, {cur, 0}),
101    {ok, Reader2#reader{pos=Pos}};
102init(_Handle, _AccessMode, _Fun) ->
103    {error, badarg}.
104
105%%%================================================================
106%% Extracts all files from the tar file Name.
107-spec extract(open_handle()) -> ok | {error, term()}.
108extract(Name) ->
109    extract(Name, []).
110
111%% Extracts (all) files from the tar file Name.
112%% Options accepted:
113%%  - cooked: Opens the tar file without mode `raw`
114%%  - compressed: Uncompresses the tar file when reading
115%%  - memory: Returns the tar contents as a list of tuples {Name, Bin}
116%%  - keep_old_files: Extracted files will not overwrite the destination
117%%  - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract
118%%  - verbose: Prints verbose information about the extraction,
119%%  - {cwd, AbsoluteDir}: Sets the current working directory for the extraction
120-spec extract(open_handle(), [extract_opt()]) ->
121                     ok
122                         | {ok, [{string(), binary()}]}
123                         | {error, term()}.
124extract({binary, Bin}, Opts) when is_list(Opts) ->
125    do_extract({binary, Bin}, Opts);
126extract({file, Fd}, Opts) when is_list(Opts) ->
127    do_extract({file, Fd}, Opts);
128extract(#reader{}=Reader, Opts) when is_list(Opts) ->
129    do_extract(Reader, Opts);
130extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
131    do_extract(Name, Opts).
132
133do_extract(Handle, Opts) when is_list(Opts) ->
134    Opts2 = extract_opts(Opts),
135    Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
136    foldl_read(Handle, fun extract1/4, Acc, Opts2).
137
138extract1(eof, Reader, _, Acc) when is_list(Acc) ->
139    {ok, {ok, lists:reverse(Acc)}, Reader};
140extract1(eof, Reader, _, leading_slash) ->
141    error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
142    {ok, ok, Reader};
143extract1(eof, Reader, _, Acc) ->
144    {ok, Acc, Reader};
145extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) ->
146    case check_extract(Name, Opts) of
147        true ->
148            case do_read(Reader0, Size) of
149                {ok, Bin, Reader1} ->
150                    Acc = extract2(Header, Bin, Opts, Acc0),
151                    {ok, Acc, Reader1};
152                {error, _} = Err ->
153                    throw(Err)
154            end;
155        false ->
156            {ok, Acc0, skip_file(Reader0)}
157    end.
158
159extract2(Header, Bin, Opts, Acc) ->
160    case write_extracted_element(Header, Bin, Opts) of
161        ok ->
162            case Header of
163                #tar_header{name="/"++_} ->
164                    leading_slash;
165                #tar_header{} ->
166                    Acc
167            end;
168        {ok, NameBin} when is_list(Acc) ->
169            [NameBin | Acc];
170        {error, _} = Err ->
171            throw(Err)
172    end.
173
174%% Checks if the file Name should be extracted.
175check_extract(_, #read_opts{files=all}) ->
176    true;
177check_extract(Name, #read_opts{files=Files}) ->
178    ordsets:is_element(Name, Files).
179
180%%%================================================================
181%% The following table functions produce a list of information about
182%% the files contained in the archive.
183-type filename() :: string().
184-type typeflag() :: regular | link | symlink |
185                    char | block | directory |
186                    fifo | reserved | unknown.
187-type mode() :: non_neg_integer().
188-type uid() :: non_neg_integer().
189-type gid() :: non_neg_integer().
190
191-type tar_entry() :: {filename(),
192                      typeflag(),
193                      non_neg_integer(),
194                      tar_time(),
195                      mode(),
196                      uid(),
197                      gid()}.
198
199%% Returns a list of names of the files in the tar file Name.
200-spec table(open_handle()) -> {ok, [string()]} | {error, term()}.
201table(Name) ->
202    table(Name, []).
203
204%% Returns a list of names of the files in the tar file Name.
205%% Options accepted: compressed, verbose, cooked.
206-spec table(open_handle(), [compressed | verbose | cooked]) ->
207                   {ok, [tar_entry()]} | {error, term()}.
208table(Name, Opts) when is_list(Opts) ->
209    foldl_read(Name, fun table1/4, [], table_opts(Opts)).
210
211table1(eof, Reader, _, Result) ->
212    {ok, {ok, lists:reverse(Result)}, Reader};
213table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) ->
214    Attrs = table1_attrs(Header, Verbose),
215    Reader2 = skip_file(Reader),
216    {ok, [Attrs|Result], Reader2}.
217
218%% Extracts attributes relevant to table1's output
219table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) ->
220    Type = typeflag(Typeflag),
221    Name = Header#tar_header.name,
222    Mtime = Header#tar_header.mtime,
223    Uid = Header#tar_header.uid,
224    Gid = Header#tar_header.gid,
225    Size = Header#tar_header.size,
226    {Name, Type, Size, Mtime, Mode, Uid, Gid};
227table1_attrs(#tar_header{name=Name}, _Verbose) ->
228    Name.
229
230typeflag(?TYPE_REGULAR) -> regular;
231typeflag(?TYPE_REGULAR_A) -> regular;
232typeflag(?TYPE_GNU_SPARSE) -> regular;
233typeflag(?TYPE_CONT) -> regular;
234typeflag(?TYPE_LINK) -> link;
235typeflag(?TYPE_SYMLINK) -> symlink;
236typeflag(?TYPE_CHAR) -> char;
237typeflag(?TYPE_BLOCK) -> block;
238typeflag(?TYPE_DIR) -> directory;
239typeflag(?TYPE_FIFO) -> fifo;
240typeflag(_) -> unknown.
241
242%%%================================================================
243%% Comments for printing the contents of a tape archive,
244%% meant to be invoked from the shell.
245
246%% Prints each filename in the archive
247-spec t(file:filename()) -> ok | {error, term()}.
248t(Name) when is_list(Name); is_binary(Name) ->
249    case table(Name) of
250        {ok, List} ->
251            lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
252        Error ->
253            Error
254    end.
255
256%% Prints verbose information about each file in the archive
257-spec tt(open_handle()) -> ok | {error, term()}.
258tt(Name) ->
259    case table(Name, [verbose]) of
260        {ok, List} ->
261            lists:foreach(fun print_header/1, List);
262        Error ->
263            Error
264    end.
265
266%% Used by tt/1 to print a tar_entry tuple
267-spec print_header(tar_entry()) -> ok.
268print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) ->
269    io:format("~s~s ~4w/~-4w ~7w ~s ~s\n",
270              [type_to_string(Type), mode_to_string(Mode),
271               Uid, Gid, Size, time_to_string(Mtime), Name]).
272
273type_to_string(regular)   -> "-";
274type_to_string(directory) -> "d";
275type_to_string(link)      -> "l";
276type_to_string(symlink)   -> "s";
277type_to_string(char)      -> "c";
278type_to_string(block)     -> "b";
279type_to_string(fifo)      -> "f";
280type_to_string(unknown)   -> "?".
281
282%% Converts a numeric mode to its human-readable representation
283mode_to_string(Mode) ->
284    mode_to_string(Mode, "xwrxwrxwr", []).
285mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 ->
286    mode_to_string(Mode bsr 1, T, [C|Acc]);
287mode_to_string(Mode, [_|T], Acc) ->
288    mode_to_string(Mode bsr 1, T, [$-|Acc]);
289mode_to_string(_, [], Acc) ->
290    Acc.
291
292%% Converts a tar_time() (POSIX time) to a readable string
293time_to_string(Secs0) ->
294    Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH),
295    Secs = Epoch + Secs0,
296    DateTime0 = calendar:gregorian_seconds_to_datetime(Secs),
297    DateTime = calendar:universal_time_to_local_time(DateTime0),
298    {{Y, Mon, Day}, {H, Min, _}} = DateTime,
299    io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]).
300
301two_d(N) ->
302    tl(integer_to_list(N + 100)).
303
304month(1) -> "Jan";
305month(2) -> "Feb";
306month(3) -> "Mar";
307month(4) -> "Apr";
308month(5) -> "May";
309month(6) -> "Jun";
310month(7) -> "Jul";
311month(8) -> "Aug";
312month(9) -> "Sep";
313month(10) -> "Oct";
314month(11) -> "Nov";
315month(12) -> "Dec".
316
317%%%================================================================
318%% The open function with friends is to keep the file and binary api of this module
319-type open_handle() :: file:filename()
320                     | {binary, binary()}
321                     | {file, term()}.
322-spec open(open_handle(), [write | compressed | cooked]) ->
323                  {ok, reader()} | {error, term()}.
324open({binary, Bin}, Mode) when is_binary(Bin) ->
325    do_open({binary, Bin}, Mode);
326open({file, Fd}, Mode) ->
327    do_open({file, Fd}, Mode);
328open(Name, Mode) when is_list(Name); is_binary(Name) ->
329    do_open(Name, Mode).
330
331do_open(Name, Mode) when is_list(Mode) ->
332    case open_mode(Mode) of
333        {ok, Access, Raw, Opts} ->
334            open1(Name, Access, Raw, Opts);
335        {error, Reason} ->
336            {error, {Name, Reason}}
337    end.
338
339open1({binary,Bin0}, read, _Raw, Opts) when is_binary(Bin0) ->
340    Bin = case lists:member(compressed, Opts) of
341        true ->
342            try
343              zlib:gunzip(Bin0)
344            catch
345              _:_ -> Bin0
346            end;
347        false ->
348            Bin0
349    end,
350    case file:open(Bin, [ram,binary,read]) of
351        {ok,File} ->
352            {ok, #reader{handle=File,access=read,func=fun file_op/2}};
353        Error ->
354            Error
355    end;
356open1({file, Fd}, read, _Raw, _Opts) ->
357    Reader = #reader{handle=Fd,access=read,func=fun file_op/2},
358    case do_position(Reader, {cur, 0}) of
359        {ok, Pos, Reader2} ->
360            {ok, Reader2#reader{pos=Pos}};
361        {error, _} = Err ->
362            Err
363    end;
364open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) ->
365    case file:open(Name, Raw ++ [binary, Access|Opts]) of
366        {ok, File} ->
367            {ok, #reader{handle=File,access=Access,func=fun file_op/2}};
368        {error, Reason} ->
369            {error, {Name, Reason}}
370    end.
371
372open_mode(Mode) ->
373    open_mode(Mode, false, [raw], []).
374
375open_mode(read, _, Raw, _) ->
376    {ok, read, Raw, []};
377open_mode(write, _, Raw, _) ->
378    {ok, write, Raw, []};
379open_mode([read|Rest], false, Raw, Opts) ->
380    open_mode(Rest, read, Raw, Opts);
381open_mode([write|Rest], false, Raw, Opts) ->
382    open_mode(Rest, write, Raw, Opts);
383open_mode([compressed|Rest], Access, Raw, Opts) ->
384    open_mode(Rest, Access, Raw, [compressed|Opts]);
385open_mode([cooked|Rest], Access, _Raw, Opts) ->
386    open_mode(Rest, Access, [], Opts);
387open_mode([], Access, Raw, Opts) ->
388    {ok, Access, Raw, Opts};
389open_mode(_, _, _, _) ->
390    {error, einval}.
391
392file_op(write, {Fd, Data}) ->
393    file:write(Fd, Data);
394file_op(position, {Fd, Pos}) ->
395    file:position(Fd, Pos);
396file_op(read2, {Fd, Size}) ->
397    file:read(Fd, Size);
398file_op(close, Fd) ->
399    file:close(Fd).
400
401%% Closes a tar archive.
402-spec close(reader()) -> ok | {error, term()}.
403close(#reader{access=read}=Reader) ->
404    ok = do_close(Reader);
405close(#reader{access=write}=Reader) ->
406    {ok, Reader2} = pad_file(Reader),
407    ok = do_close(Reader2),
408    ok;
409close(_) ->
410    {error, einval}.
411
412pad_file(#reader{pos=Pos}=Reader) ->
413    %% There must be at least two zero blocks at the end.
414    PadCurrent = skip_padding(Pos+?BLOCK_SIZE),
415    Padding = <<0:PadCurrent/unit:8>>,
416    do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]).
417
418
419%%%================================================================
420%% Creation/modification of tar archives
421
422%% Creates a tar file Name containing the given files.
423-spec create(file:filename(), filelist()) -> ok | {error, {string(), term()}}.
424create(Name, FileList) when is_list(Name); is_binary(Name) ->
425    create(Name, FileList, []).
426
427%% Creates a tar archive Name containing the given files.
428%% Accepted options: verbose, compressed, cooked
429-spec create(file:filename(), filelist(), [create_opt()]) ->
430                    ok | {error, term()} | {error, {string(), term()}}.
431create(Name, FileList, Options) when is_list(Name); is_binary(Name) ->
432    Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked)
433                        end, Options),
434    case open(Name, [write|Mode]) of
435        {ok, TarFile} ->
436            do_create(TarFile, FileList, Options);
437        {error, _} = Err ->
438            Err
439    end.
440
441do_create(TarFile, [], _Opts) ->
442    close(TarFile);
443do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) ->
444    case add(TarFile, NameOrBin, NameInArchive, Opts) of
445        ok ->
446            do_create(TarFile, Rest, Opts);
447        {error, _} = Err ->
448            _ = close(TarFile),
449            Err
450    end;
451do_create(TarFile, [Name|Rest], Opts) ->
452    case add(TarFile, Name, Name, Opts) of
453        ok ->
454            do_create(TarFile, Rest, Opts);
455        {error, _} = Err ->
456            _ = close(TarFile),
457            Err
458    end.
459
460%% Adds a file to a tape archive.
461-type add_type() :: string()
462                  | {string(), string()}
463                  | {string(), binary()}.
464-spec add(reader(), add_type(), [add_opt()]) -> ok | {error, term()}.
465add(Reader, {NameInArchive, Name}, Opts)
466  when is_list(NameInArchive), is_list(Name) ->
467    do_add(Reader, Name, NameInArchive, undefined, Opts);
468add(Reader, {NameInArchive, Bin}, Opts)
469  when is_list(NameInArchive), is_binary(Bin) ->
470    do_add(Reader, Bin, NameInArchive, undefined, Opts);
471add(Reader, {NameInArchive, Bin, Mode}, Opts)
472  when is_list(NameInArchive), is_binary(Bin), is_integer(Mode) ->
473    do_add(Reader, Bin, NameInArchive, Mode, Opts);
474add(Reader, Name, Opts) when is_list(Name) ->
475    do_add(Reader, Name, Name, undefined, Opts).
476
477
478-spec add(reader(), string() | binary(), string(), [add_opt()]) ->
479                 ok | {error, term()}.
480add(Reader, NameOrBin, NameInArchive, Options)
481  when is_list(NameOrBin); is_binary(NameOrBin),
482       is_list(NameInArchive), is_list(Options) ->
483    do_add(Reader, NameOrBin, NameInArchive, undefined, Options).
484
485-spec add(reader(), string() | binary(), string(), integer(), [add_opt()]) ->
486                 ok | {error, term()}.
487add(Reader, NameOrBin, NameInArchive, Mode, Options)
488  when is_list(NameOrBin); is_binary(NameOrBin),
489       is_list(NameInArchive), is_integer(Mode), is_list(Options) ->
490    do_add(Reader, NameOrBin, NameInArchive, Mode, Options).
491
492do_add(#reader{access=write}=Reader, Name, NameInArchive, Mode, Options)
493  when is_list(NameInArchive), is_list(Options) ->
494    RF = fun(F) -> apply_file_info_opts(Options, file:read_link_info(F, [{time, posix}])) end,
495    Opts = #add_opts{read_info=RF},
496    add1(Reader, Name, NameInArchive, Mode, add_opts(Options, Options, Opts));
497do_add(#reader{access=read},_,_,_,_) ->
498    {error, eacces};
499do_add(Reader,_,_,_,_) ->
500    {error, {badarg, Reader}}.
501
502add_opts([dereference|T], AllOptions, Opts) ->
503    RF = fun(F) -> apply_file_info_opts(AllOptions, file:read_file_info(F, [{time, posix}])) end,
504    add_opts(T, AllOptions, Opts#add_opts{read_info=RF});
505add_opts([verbose|T], AllOptions, Opts) ->
506    add_opts(T, AllOptions, Opts#add_opts{verbose=true});
507add_opts([{chunks,N}|T], AllOptions, Opts) ->
508    add_opts(T, AllOptions, Opts#add_opts{chunk_size=N});
509add_opts([{atime,Value}|T], AllOptions, Opts) ->
510    add_opts(T, AllOptions, Opts#add_opts{atime=Value});
511add_opts([{mtime,Value}|T], AllOptions, Opts) ->
512    add_opts(T, AllOptions, Opts#add_opts{mtime=Value});
513add_opts([{ctime,Value}|T], AllOptions, Opts) ->
514    add_opts(T, AllOptions, Opts#add_opts{ctime=Value});
515add_opts([{uid,Value}|T], AllOptions, Opts) ->
516    add_opts(T, AllOptions, Opts#add_opts{uid=Value});
517add_opts([{gid,Value}|T], AllOptions, Opts) ->
518    add_opts(T, AllOptions, Opts#add_opts{gid=Value});
519add_opts([_|T], AllOptions, Opts) ->
520    add_opts(T, AllOptions, Opts);
521add_opts([], _AllOptions, Opts) ->
522    Opts.
523
524apply_file_info_opts(Opts, {ok, FileInfo}) ->
525    {ok, do_apply_file_info_opts(Opts, FileInfo)};
526apply_file_info_opts(_Opts, Other) ->
527    Other.
528
529do_apply_file_info_opts([{atime,Value}|T], FileInfo) ->
530    do_apply_file_info_opts(T, FileInfo#file_info{atime=Value});
531do_apply_file_info_opts([{mtime,Value}|T], FileInfo) ->
532    do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value});
533do_apply_file_info_opts([{ctime,Value}|T], FileInfo) ->
534    do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value});
535do_apply_file_info_opts([{uid,Value}|T], FileInfo) ->
536    do_apply_file_info_opts(T, FileInfo#file_info{uid=Value});
537do_apply_file_info_opts([{gid,Value}|T], FileInfo) ->
538    do_apply_file_info_opts(T, FileInfo#file_info{gid=Value});
539do_apply_file_info_opts([_|T], FileInfo) ->
540    do_apply_file_info_opts(T, FileInfo);
541do_apply_file_info_opts([], FileInfo) ->
542    FileInfo.
543
544add1(#reader{}=Reader, Name, NameInArchive, undefined, #add_opts{read_info=ReadInfo}=Opts)
545  when is_list(Name) ->
546    Res = case ReadInfo(Name) of
547              {error, Reason0} ->
548                  {error, {Name, Reason0}};
549              {ok, #file_info{type=symlink}=Fi} ->
550                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
551                  {ok, Linkname} = file:read_link(Name),
552                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
553                  add_header(Reader, Header, Opts);
554              {ok, #file_info{type=regular}=Fi} ->
555                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
556                  Header = fileinfo_to_header(NameInArchive, Fi, false),
557                  {ok, Reader2} = add_header(Reader, Header, Opts),
558                  FileSize = Header#tar_header.size,
559                  {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts),
560                  Padding = skip_padding(FileSize),
561                  Pad = <<0:Padding/unit:8>>,
562                  do_write(Reader3, Pad);
563              {ok, #file_info{type=directory}=Fi} ->
564                  add_directory(Reader, Name, NameInArchive, Fi, Opts);
565              {ok, #file_info{}=Fi} ->
566                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
567                  Header = fileinfo_to_header(NameInArchive, Fi, false),
568                  add_header(Reader, Header, Opts)
569          end,
570    case Res of
571        ok -> ok;
572        {ok, _Reader} -> ok;
573        {error, _Reason} = Err -> Err
574    end;
575add1(Reader, Bin, NameInArchive, Mode, Opts) when is_binary(Bin) ->
576    add_verbose(Opts, "a ~ts~n", [NameInArchive]),
577    Now = 0,
578    Header = #tar_header{
579                name = NameInArchive,
580                size = byte_size(Bin),
581                typeflag = ?TYPE_REGULAR,
582                atime = add_opts_time(Opts#add_opts.atime, Now),
583                mtime = add_opts_time(Opts#add_opts.mtime, Now),
584                ctime = add_opts_time(Opts#add_opts.ctime, Now),
585                uid = Opts#add_opts.uid,
586                gid = Opts#add_opts.gid,
587                mode = default_mode(Mode, 8#100644)},
588    {ok, Reader2} = add_header(Reader, Header, Opts),
589    Padding = skip_padding(byte_size(Bin)),
590    Data = [Bin, <<0:Padding/unit:8>>],
591    case do_write(Reader2, Data) of
592        {ok, _Reader3} -> ok;
593        {error, Reason} -> {error, {NameInArchive, Reason}}
594    end.
595
596add_opts_time(undefined, _Now) -> 0;
597add_opts_time(Time, _Now) -> Time.
598
599default_mode(undefined, Mode) -> Mode;
600default_mode(Mode, _) -> Mode.
601
602add_directory(Reader, DirName, NameInArchive, Info, Opts) ->
603    case file:list_dir(DirName) of
604        {ok, []} ->
605            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
606            Header = fileinfo_to_header(NameInArchive, Info, false),
607            add_header(Reader, Header, Opts);
608        {ok, Files} ->
609            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
610            try add_files(Reader, Files, DirName, NameInArchive, Opts) of
611                ok -> ok;
612                {error, _} = Err -> Err
613            catch
614                throw:{error, {_Name, _Reason}} = Err -> Err;
615                throw:{error, Reason} -> {error, {DirName, Reason}}
616            end;
617        {error, Reason} ->
618            {error, {DirName, Reason}}
619    end.
620
621add_files(_Reader, [], _Dir, _DirInArchive, _Opts) ->
622    ok;
623add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) ->
624    FullName = filename:join(Dir, Name),
625    NameInArchive = filename:join(DirInArchive, Name),
626    Res = case Info(FullName) of
627              {error, Reason} ->
628                  {error, {FullName, Reason}};
629              {ok, #file_info{type=directory}=Fi} ->
630                  add_directory(Reader, FullName, NameInArchive, Fi, Opts);
631              {ok, #file_info{type=symlink}=Fi} ->
632                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
633                  {ok, Linkname} = file:read_link(FullName),
634                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
635                  add_header(Reader, Header, Opts);
636              {ok, #file_info{type=regular}=Fi} ->
637                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
638                  Header = fileinfo_to_header(NameInArchive, Fi, false),
639                  {ok, Reader2} = add_header(Reader, Header, Opts),
640                  FileSize = Header#tar_header.size,
641                  {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts),
642                  Padding = skip_padding(FileSize),
643                  Pad = <<0:Padding/unit:8>>,
644                  do_write(Reader3, Pad);
645              {ok, #file_info{}=Fi} ->
646                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
647                  Header = fileinfo_to_header(NameInArchive, Fi, false),
648                  add_header(Reader, Header, Opts)
649          end,
650    case Res of
651        ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts);
652        {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts);
653        {error, _} = Err -> Err
654    end.
655
656format_string(String, Size) when length(String) > Size ->
657    throw({error, {write_string, field_too_long}});
658format_string(String, Size) ->
659    Ascii = to_ascii(String),
660    if byte_size(Ascii) < Size ->
661            [Ascii, 0];
662       true ->
663            Ascii
664    end.
665
666format_octal(Octal) ->
667    iolist_to_binary(io_lib:fwrite("~.8B", [Octal])).
668
669add_header(#reader{}=Reader, #tar_header{}=Header, Opts) ->
670    {ok, Iodata} = build_header(Header, Opts),
671    do_write(Reader, Iodata).
672
673write_to_block(Block, IoData, Start) when is_list(IoData) ->
674    write_to_block(Block, iolist_to_binary(IoData), Start);
675write_to_block(Block, Bin, Start) when is_binary(Bin) ->
676    Size = byte_size(Bin),
677    <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block,
678    <<Head:Start/unit:8, Bin/binary, Rest/binary>>.
679
680build_header(#tar_header{}=Header, Opts) ->
681    #tar_header{
682       name=Name,
683       mode=Mode,
684       uid=Uid,
685       gid=Gid,
686       size=Size,
687       typeflag=Type,
688       linkname=Linkname,
689       uname=Uname,
690       gname=Gname,
691       devmajor=Devmaj,
692       devminor=Devmin
693      } = Header,
694    Mtime = Header#tar_header.mtime,
695
696    Block0 = ?ZERO_BLOCK,
697    {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}),
698    Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode),
699    {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0),
700    {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1),
701    {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2),
702    {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3),
703    {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4),
704    {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN,
705                                  Linkname, ?PAX_LINKPATH, Pax5),
706    {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN,
707                                  Uname, ?PAX_UNAME, Pax6),
708    {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN,
709                                   Gname, ?PAX_GNAME, Pax7),
710    {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN,
711                                    Devmaj, ?PAX_NONE, Pax8),
712    {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN,
713                                     Devmin, ?PAX_NONE, Pax9),
714    {Block13, Pax11} = set_path(Block12, Pax10),
715    PaxEntry = case maps:size(Pax11) of
716                   0 -> [];
717                   _ -> build_pax_entry(Header, Pax11, Opts)
718               end,
719    Block14 = set_format(Block13, ?FORMAT_USTAR),
720    Block15 = set_checksum(Block14),
721    {ok, [PaxEntry, Block15]}.
722
723set_path(Block0, Pax) ->
724     %% only use ustar header when name is too long
725    case maps:get(?PAX_PATH, Pax, nil) of
726        nil ->
727            {Block0, Pax};
728        PaxPath ->
729            case split_ustar_path(PaxPath) of
730                {ok, UstarName, UstarPrefix} ->
731                    {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN,
732                                               UstarName, ?PAX_NONE, #{}),
733                    {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN,
734                                               UstarPrefix, ?PAX_NONE, #{}),
735                    {Block2, maps:remove(?PAX_PATH, Pax)};
736                false ->
737                    {Block0, Pax}
738            end
739    end.
740
741set_format(Block0, Format)
742  when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX ->
743    Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC),
744    write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION);
745set_format(_Block, Format) ->
746    throw({error, {invalid_format, Format}}).
747
748set_checksum(Block) ->
749    Checksum = compute_checksum(Block),
750    write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum).
751
752build_pax_entry(Header, PaxAttrs, Opts) ->
753    Path = Header#tar_header.name,
754    Filename = filename:basename(Path),
755    Dir = filename:dirname(Path),
756    Path2 = filename:join([Dir, "PaxHeaders.0", Filename]),
757    AsciiPath = to_ascii(Path2),
758    Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN ->
759                    binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1);
760               true ->
761                    AsciiPath
762            end,
763    Keys = maps:keys(PaxAttrs),
764    SortedKeys = lists:sort(Keys),
765    PaxFile = build_pax_file(SortedKeys, PaxAttrs),
766    Size = byte_size(PaxFile),
767    Padding = (?BLOCK_SIZE -
768                   (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE,
769    Pad = <<0:Padding/unit:8>>,
770    PaxHeader = #tar_header{
771                   name=unicode:characters_to_list(Path3),
772                   size=Size,
773                   mtime=Header#tar_header.mtime,
774                   atime=Header#tar_header.atime,
775                   ctime=Header#tar_header.ctime,
776                   typeflag=?TYPE_X_HEADER
777                  },
778    {ok, PaxHeaderData} = build_header(PaxHeader, Opts),
779    [PaxHeaderData, PaxFile, Pad].
780
781build_pax_file(Keys, PaxAttrs) ->
782    build_pax_file(Keys, PaxAttrs, []).
783build_pax_file([], _, Acc) ->
784    unicode:characters_to_binary(Acc);
785build_pax_file([K|Rest], Attrs, Acc) ->
786    V = maps:get(K, Attrs),
787    Size = sizeof(K) + sizeof(V) + 3,
788    Size2 = sizeof(Size) + Size,
789    Key = to_string(K),
790    Value = to_string(V),
791    Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])),
792    if byte_size(Record) =/= Size2 ->
793            Size3 = byte_size(Record),
794            Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]),
795            build_pax_file(Rest, Attrs, [Acc, Record2]);
796       true ->
797            build_pax_file(Rest, Attrs, [Acc, Record])
798    end.
799
800sizeof(Bin) when is_binary(Bin) ->
801    byte_size(Bin);
802sizeof(List) when is_list(List) ->
803    length(List);
804sizeof(N) when is_integer(N) ->
805    byte_size(integer_to_binary(N));
806sizeof(N) when is_float(N) ->
807    byte_size(float_to_binary(N)).
808
809to_string(Bin) when is_binary(Bin) ->
810    unicode:characters_to_list(Bin);
811to_string(List) when is_list(List) ->
812    List;
813to_string(N) when is_integer(N) ->
814    integer_to_list(N);
815to_string(N) when is_float(N) ->
816    float_to_list(N).
817
818split_ustar_path(Path) ->
819    Len = length(Path),
820    NotAscii = not is_ascii(Path),
821    if Len =< ?V7_NAME_LEN; NotAscii ->
822            false;
823       true ->
824            PathBin = binary:list_to_bin(Path),
825            case binary:split(PathBin, [<<$/>>], [global, trim_all]) of
826                [Part] when byte_size(Part) >= ?V7_NAME_LEN ->
827                    false;
828                Parts ->
829                    case lists:last(Parts) of
830                        Name when byte_size(Name) >= ?V7_NAME_LEN ->
831                            false;
832                        Name ->
833                            Parts2 = lists:sublist(Parts, length(Parts) - 1),
834                            join_split_ustar_path(Parts2, {ok, Name, nil})
835                    end
836            end
837    end.
838
839join_split_ustar_path([], Acc) ->
840    Acc;
841join_split_ustar_path([Part|_], {ok, _, nil})
842  when byte_size(Part) > ?USTAR_PREFIX_LEN ->
843    false;
844join_split_ustar_path([Part|_], {ok, _Name, Acc})
845  when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN ->
846    false;
847join_split_ustar_path([Part|Rest], {ok, Name, nil}) ->
848    join_split_ustar_path(Rest, {ok, Name, Part});
849join_split_ustar_path([Part|Rest], {ok, Name, Acc}) ->
850    join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}).
851
852write_octal(Block, Pos, Size, X) ->
853    Octal = zero_pad(format_octal(X), Size-1),
854    if byte_size(Octal) < Size ->
855            write_to_block(Block, Octal, Pos);
856       true ->
857            throw({error, {write_failed, octal_field_too_long}})
858    end.
859
860write_string(Block, Pos, Size, Str, PaxAttr, Pax0) ->
861    NotAscii = not is_ascii(Str),
862    if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) ->
863            Pax1 = maps:put(PaxAttr, Str, Pax0),
864            {Block, Pax1};
865       true ->
866            Formatted = format_string(Str, Size),
867            {write_to_block(Block, Formatted, Pos), Pax0}
868    end.
869write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) ->
870    %% attempt octal
871    Octal = zero_pad(format_octal(X), Size-1),
872    if byte_size(Octal) < Size ->
873            {write_to_block(Block, [Octal, 0], Pos), Pax0};
874       PaxAttr =/= ?PAX_NONE ->
875            Pax1 = maps:put(PaxAttr, X, Pax0),
876            {Block, Pax1};
877       true ->
878            throw({error, {write_failed, numeric_field_too_long}})
879    end.
880
881zero_pad(Str, Size) when byte_size(Str) >= Size ->
882    Str;
883zero_pad(Str, Size) ->
884    Padding = Size - byte_size(Str),
885    Pad = binary:copy(<<$0>>, Padding),
886    <<Pad/binary, Str/binary>>.
887
888
889%%%================================================================
890%% Functions for creating or modifying tar archives
891
892read_block(Reader) ->
893    case do_read(Reader, ?BLOCK_SIZE) of
894        eof ->
895            throw({error, eof});
896        %% Two zero blocks mark the end of the archive
897        {ok, ?ZERO_BLOCK, Reader1} ->
898            case do_read(Reader1, ?BLOCK_SIZE) of
899                eof ->
900                    % This is technically a malformed end-of-archive marker,
901                    % as two ZERO_BLOCKs are expected as the marker,
902                    % but if we've already made it this far, we should just ignore it
903                    eof;
904                {ok, ?ZERO_BLOCK, _Reader2} ->
905                    eof;
906                {ok, _Block, _Reader2} ->
907                    throw({error, invalid_end_of_archive});
908                {error,_} = Err ->
909                    throw(Err)
910            end;
911        {ok, Block, Reader1} when is_binary(Block) ->
912            {ok, Block, Reader1};
913        {error, _} = Err ->
914            throw(Err)
915    end.
916
917get_header(#reader{}=Reader) ->
918    case read_block(Reader) of
919        eof ->
920            eof;
921        {ok, Block, Reader1} ->
922            convert_header(Block, Reader1)
923    end.
924
925%% Converts the tar header to a record.
926to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
927    #header_v7{
928       name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN),
929       mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN),
930       uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN),
931       gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN),
932       size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN),
933       mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN),
934       checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN),
935       typeflag=binary:at(Bin, ?V7_TYPE),
936       linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN)
937      };
938to_v7(_) ->
939    {error, header_block_too_small}.
940
941to_gnu(#header_v7{}=V7, Bin)
942  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
943    #header_gnu{
944       header_v7=V7,
945       magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN),
946       version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN),
947       uname=binary_part(Bin, 265, 32),
948       gname=binary_part(Bin, 297, 32),
949       devmajor=binary_part(Bin, 329, 8),
950       devminor=binary_part(Bin, 337, 8),
951       atime=binary_part(Bin, 345, 12),
952       ctime=binary_part(Bin, 357, 12),
953       sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)),
954       real_size=binary_part(Bin, 483, 12)
955      }.
956
957to_star(#header_v7{}=V7, Bin)
958  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
959    #header_star{
960       header_v7=V7,
961       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
962       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
963       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
964       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
965       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
966       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
967       prefix=binary_part(Bin, 345, 131),
968       atime=binary_part(Bin, 476, 12),
969       ctime=binary_part(Bin, 488, 12),
970       trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN)
971      }.
972
973to_ustar(#header_v7{}=V7, Bin)
974  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
975    #header_ustar{
976       header_v7=V7,
977       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
978       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
979       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
980       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
981       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
982       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
983       prefix=binary_part(Bin, 345, 155)
984      }.
985
986to_sparse_array(Bin) when is_binary(Bin) ->
987    MaxEntries = byte_size(Bin) div 24,
988    IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries),
989    Entries = parse_sparse_entries(Bin, MaxEntries-1, []),
990    #sparse_array{
991       entries=Entries,
992       max_entries=MaxEntries,
993       is_extended=IsExtended
994      }.
995
996parse_sparse_entries(<<>>, _, Acc) ->
997    Acc;
998parse_sparse_entries(_, -1, Acc) ->
999    Acc;
1000parse_sparse_entries(Bin, N, Acc) ->
1001    case to_sparse_entry(binary_part(Bin, N*24, 24)) of
1002        nil ->
1003            parse_sparse_entries(Bin, N-1, Acc);
1004        Entry = #sparse_entry{} ->
1005            parse_sparse_entries(Bin, N-1, [Entry|Acc])
1006    end.
1007
1008-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>).
1009to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 ->
1010    OffsetBin = binary_part(Bin, 0, 12),
1011    NumBytesBin = binary_part(Bin, 12, 12),
1012    case {OffsetBin, NumBytesBin} of
1013        {?EMPTY_ENTRY, ?EMPTY_ENTRY} ->
1014            nil;
1015        _ ->
1016            #sparse_entry{
1017               offset=parse_numeric(OffsetBin),
1018               num_bytes=parse_numeric(NumBytesBin)}
1019    end.
1020
1021-spec get_format(binary()) -> {ok, pos_integer(), header_v7()}
1022                                  | ?FORMAT_UNKNOWN
1023                                  | {error, term()}.
1024get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1025    do_get_format(to_v7(Bin), Bin).
1026
1027do_get_format({error, _} = Err, _Bin) ->
1028    Err;
1029do_get_format(#header_v7{}=V7, Bin)
1030  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1031    Checksum = parse_octal(V7#header_v7.checksum),
1032    Chk1 = compute_checksum(Bin),
1033    Chk2 = compute_signed_checksum(Bin),
1034    if Checksum =/= Chk1 andalso Checksum =/= Chk2 ->
1035            ?FORMAT_UNKNOWN;
1036       true ->
1037            %% guess magic
1038            Ustar = to_ustar(V7, Bin),
1039            Star = to_star(V7, Bin),
1040            Magic = Ustar#header_ustar.magic,
1041            Version = Ustar#header_ustar.version,
1042            Trailer = Star#header_star.trailer,
1043            Format = if
1044                         Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR ->
1045                             ?FORMAT_STAR;
1046                         Magic =:= ?MAGIC_USTAR ->
1047                             ?FORMAT_USTAR;
1048                         Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU ->
1049                             ?FORMAT_GNU;
1050                         true ->
1051                             ?FORMAT_V7
1052                     end,
1053            {ok, Format, V7}
1054    end.
1055
1056unpack_format(Format, #header_v7{}=V7, Bin, Reader)
1057  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1058    Mtime = parse_numeric(V7#header_v7.mtime),
1059    Header0 = #tar_header{
1060                 name=parse_string(V7#header_v7.name),
1061                 mode=parse_numeric(V7#header_v7.mode),
1062                 uid=parse_numeric(V7#header_v7.uid),
1063                 gid=parse_numeric(V7#header_v7.gid),
1064                 size=parse_numeric(V7#header_v7.size),
1065                 mtime=Mtime,
1066                 atime=Mtime,
1067                 ctime=Mtime,
1068                 typeflag=V7#header_v7.typeflag,
1069                 linkname=parse_string(V7#header_v7.linkname)
1070                },
1071    Typeflag = Header0#tar_header.typeflag,
1072    Header1 = if Format > ?FORMAT_V7 ->
1073                      unpack_modern(Format, V7, Bin, Header0);
1074                 true ->
1075                      Name = Header0#tar_header.name,
1076                      Header0#tar_header{name=safe_join_path("", Name)}
1077              end,
1078    HeaderOnly = is_header_only_type(Typeflag),
1079    Header2 = if HeaderOnly ->
1080                      Header1#tar_header{size=0};
1081                 true ->
1082                      Header1
1083              end,
1084    if Typeflag =:= ?TYPE_GNU_SPARSE ->
1085            Gnu = to_gnu(V7, Bin),
1086            RealSize = parse_numeric(Gnu#header_gnu.real_size),
1087            {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader),
1088            Header3 = Header2#tar_header{size=RealSize},
1089            {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)};
1090       true ->
1091            FileReader = #reg_file_reader{
1092                            handle=Reader,
1093                            num_bytes=Header2#tar_header.size,
1094                            size=Header2#tar_header.size,
1095                            pos = 0
1096                           },
1097            {Header2, FileReader}
1098    end.
1099
1100unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
1101  when is_binary(Bin) ->
1102    Typeflag = Header0#tar_header.typeflag,
1103    Ustar = to_ustar(V7, Bin),
1104    H0 = Header0#tar_header{
1105            uname=parse_string(Ustar#header_ustar.uname),
1106            gname=parse_string(Ustar#header_ustar.gname)},
1107    H1 = if Typeflag =:= ?TYPE_CHAR
1108            orelse Typeflag =:= ?TYPE_BLOCK ->
1109                Ma = parse_numeric(Ustar#header_ustar.devmajor),
1110                Mi = parse_numeric(Ustar#header_ustar.devminor),
1111                H0#tar_header{
1112                    devmajor=Ma,
1113                    devminor=Mi
1114                };
1115            true ->
1116                H0
1117        end,
1118    {Prefix, H2} = case Format of
1119                        ?FORMAT_USTAR ->
1120                            {parse_string(Ustar#header_ustar.prefix), H1};
1121                        ?FORMAT_STAR ->
1122                            Star = to_star(V7, Bin),
1123                            Prefix0 = parse_string(Star#header_star.prefix),
1124                            Atime0 = Star#header_star.atime,
1125                            Atime = parse_numeric(Atime0),
1126                            Ctime0 = Star#header_star.ctime,
1127                            Ctime = parse_numeric(Ctime0),
1128                            {Prefix0, H1#tar_header{
1129                                        atime=Atime,
1130                                        ctime=Ctime
1131                                    }};
1132                        _ ->
1133                            {"", H1}
1134                    end,
1135    Name = H2#tar_header.name,
1136    H2#tar_header{name=safe_join_path(Prefix, Name)}.
1137
1138
1139safe_join_path([], Name) ->
1140    filename:join([Name]);
1141safe_join_path(Prefix, []) ->
1142    filename:join([Prefix]);
1143safe_join_path(Prefix, Name) ->
1144    filename:join(Prefix, Name).
1145
1146new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
1147    true = validate_sparse_entries(Sparsemap, RealSize),
1148    #sparse_file_reader{
1149       handle = Reader,
1150       num_bytes = RealSize,
1151       pos = 0,
1152       size = RealSize,
1153       sparse_map = Sparsemap}.
1154
1155validate_sparse_entries(Entries, RealSize) ->
1156    validate_sparse_entries(Entries, RealSize, 0, 0).
1157validate_sparse_entries([], _RealSize, _I, _LastOffset) ->
1158    true;
1159validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) ->
1160    Offset = Entry#sparse_entry.offset,
1161    NumBytes = Entry#sparse_entry.num_bytes,
1162    if
1163        Offset > ?MAX_INT64-NumBytes ->
1164            throw({error, {invalid_sparse_map_entry, offset_too_large}});
1165        Offset+NumBytes > RealSize ->
1166            throw({error, {invalid_sparse_map_entry, offset_too_large}});
1167        I > 0 andalso LastOffset > Offset ->
1168            throw({error, {invalid_sparse_map_entry, overlapping_offsets}});
1169        true ->
1170            ok
1171    end,
1172    validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes).
1173
1174
1175-spec parse_sparse_map(header_gnu(), reader_type()) ->
1176                              {[sparse_entry()], reader_type()}.
1177parse_sparse_map(#header_gnu{sparse=Sparse}, Reader)
1178  when Sparse#sparse_array.is_extended ->
1179    parse_sparse_map(Sparse, Reader, []);
1180parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) ->
1181    {Sparse#sparse_array.entries, Reader}.
1182parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) ->
1183    case read_block(Reader) of
1184        eof ->
1185            throw({error, eof});
1186        {ok, Block, Reader2} ->
1187            Sparse2 = to_sparse_array(Block),
1188            parse_sparse_map(Sparse2, Reader2, Entries++Acc)
1189    end;
1190parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) ->
1191    Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) ->
1192                                A =< B
1193                        end, Entries++Acc),
1194    {Sorted, Reader}.
1195
1196%% Defined by taking the sum of the unsigned byte values of the
1197%% entire header record, treating the checksum bytes to as ASCII spaces
1198compute_checksum(<<H1:?V7_CHKSUM/binary,
1199                   H2:?V7_CHKSUM_LEN/binary,
1200                   Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
1201                   _/binary>>) ->
1202    C0 = checksum(H1) + (byte_size(H2) * $\s),
1203    C1 = checksum(Rest),
1204    C0 + C1.
1205
1206compute_signed_checksum(<<H1:?V7_CHKSUM/binary,
1207                          H2:?V7_CHKSUM_LEN/binary,
1208                          Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
1209                          _/binary>>) ->
1210    C0 = signed_checksum(H1) + (byte_size(H2) * $\s),
1211    C1 = signed_checksum(Rest),
1212    C0 + C1.
1213
1214%% Returns the checksum of a binary.
1215checksum(Bin) -> checksum(Bin, 0).
1216checksum(<<A/unsigned,Rest/binary>>, Sum) ->
1217    checksum(Rest, Sum+A);
1218checksum(<<>>, Sum) -> Sum.
1219
1220signed_checksum(Bin) -> signed_checksum(Bin, 0).
1221signed_checksum(<<A/signed,Rest/binary>>, Sum) ->
1222    signed_checksum(Rest, Sum+A);
1223signed_checksum(<<>>, Sum) -> Sum.
1224
1225-spec parse_numeric(binary()) -> non_neg_integer().
1226parse_numeric(<<>>) ->
1227    0;
1228parse_numeric(<<First, _/binary>> = Bin) ->
1229    %% check for base-256 format first
1230    %% if the bit is set, then all following bits constitute a two's
1231    %% complement encoded number in big-endian byte order
1232    if
1233        First band 16#80 =/= 0 ->
1234            %% Handling negative numbers relies on the following identity:
1235            %%     -a-1 == ^a
1236            %% If the number is negative, we use an inversion mask to invert
1237            %% the data bytes and treat the value as an unsigned number
1238            Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end,
1239            Bytes = binary:bin_to_list(Bin),
1240            Reducer = fun (C, {I, X}) ->
1241                              C1 = C bxor Inv,
1242                              C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end,
1243                              if (X bsr 56) > 0 ->
1244                                      throw({error,integer_overflow});
1245                                 true ->
1246                                      {I+1, (X bsl 8) bor C2}
1247                              end
1248                      end,
1249            {_, N} = lists:foldl(Reducer, {0,0}, Bytes),
1250            if (N bsr 63) > 0 ->
1251                    throw({error, integer_overflow});
1252               true ->
1253                    if Inv =:= 16#FF ->
1254                            -1 bxor N;
1255                       true ->
1256                            N
1257                    end
1258            end;
1259        true ->
1260            %% normal case is an octal number
1261            parse_octal(Bin)
1262    end.
1263
1264parse_octal(Bin) when is_binary(Bin) ->
1265    %% skip leading/trailing zero bytes and spaces
1266    do_parse_octal(Bin, <<>>).
1267do_parse_octal(<<>>, <<>>) ->
1268    0;
1269do_parse_octal(<<>>, Acc) ->
1270    case io_lib:fread("~8u", binary:bin_to_list(Acc)) of
1271        {error, _} -> throw({error, invalid_tar_checksum});
1272        {ok, [Octal], []} -> Octal;
1273        {ok, _, _} -> throw({error, invalid_tar_checksum})
1274    end;
1275do_parse_octal(<<$\s,Rest/binary>>, Acc) ->
1276    do_parse_octal(Rest, Acc);
1277do_parse_octal(<<0, Rest/binary>>, Acc) ->
1278    do_parse_octal(Rest, Acc);
1279do_parse_octal(<<C, Rest/binary>>, Acc) ->
1280    do_parse_octal(Rest, <<Acc/binary, C>>).
1281
1282parse_string(Bin) when is_binary(Bin) ->
1283    do_parse_string(Bin, <<>>).
1284do_parse_string(<<>>, Acc) ->
1285    case unicode:characters_to_list(Acc) of
1286        Str when is_list(Str) ->
1287            Str;
1288        {incomplete, _Str, _Rest} ->
1289            binary:bin_to_list(Acc);
1290        {error, _Str, _Rest} ->
1291            throw({error, {bad_header, invalid_string}})
1292    end;
1293do_parse_string(<<0, _/binary>>, Acc) ->
1294    do_parse_string(<<>>, Acc);
1295do_parse_string(<<C, Rest/binary>>, Acc) ->
1296    do_parse_string(Rest, <<Acc/binary, C>>).
1297
1298convert_header(Bin, #reader{pos=Pos}=Reader)
1299  when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 ->
1300    case get_format(Bin) of
1301        ?FORMAT_UNKNOWN ->
1302            throw({error, bad_header});
1303        {ok, Format, V7} ->
1304            unpack_format(Format, V7, Bin, Reader);
1305        {error, Reason} ->
1306            throw({error, {bad_header, Reason}})
1307    end;
1308convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE ->
1309    throw({error, misaligned_read, Pos});
1310convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 ->
1311    eof;
1312convert_header(_Bin, _Reader) ->
1313    throw({error, eof}).
1314
1315%% Creates a partially-populated header record based
1316%% on the provided file_info record. If the file is
1317%% a symlink, then `link` is used as the link target.
1318%% If the file is a directory, a slash is appended to the name.
1319fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) ->
1320    BaseHeader = #tar_header{name=Name,
1321                             mtime=0,
1322                             atime=0,
1323                             ctime=0,
1324                             mode=Fi#file_info.mode,
1325                             typeflag=?TYPE_REGULAR},
1326    do_fileinfo_to_header(BaseHeader, Fi, Link).
1327
1328do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) ->
1329    Header#tar_header{size=Size,typeflag=?TYPE_REGULAR};
1330do_fileinfo_to_header(#tar_header{name=Name}=Header,
1331                      #file_info{type=directory}, _Link) ->
1332    Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR};
1333do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) ->
1334    Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link};
1335do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
1336  when (Mode band ?S_IFMT) =:= ?S_IFCHR ->
1337    Header#tar_header{typeflag=?TYPE_CHAR,
1338                      devmajor=Fi#file_info.major_device,
1339                      devminor=Fi#file_info.minor_device};
1340do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
1341  when (Mode band ?S_IFMT) =:= ?S_IFBLK ->
1342    Header#tar_header{typeflag=?TYPE_BLOCK,
1343                      devmajor=Fi#file_info.major_device,
1344                      devminor=Fi#file_info.minor_device};
1345do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link)
1346  when (Mode band ?S_IFMT) =:= ?S_FIFO ->
1347    Header#tar_header{typeflag=?TYPE_FIFO};
1348do_fileinfo_to_header(Header, Fi, _Link) ->
1349    {error, {invalid_file_type, Header#tar_header.name, Fi}}.
1350
1351is_ascii(Str) when is_list(Str) ->
1352    not lists:any(fun (Char) -> Char >= 16#80 end, Str);
1353is_ascii(Bin) when is_binary(Bin) ->
1354    is_ascii1(Bin).
1355
1356is_ascii1(<<>>) ->
1357    true;
1358is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 ->
1359    false;
1360is_ascii1(<<_, Rest/binary>>) ->
1361    is_ascii1(Rest).
1362
1363to_ascii(Str) when is_list(Str) ->
1364    case is_ascii(Str) of
1365        true ->
1366            unicode:characters_to_binary(Str);
1367        false ->
1368            Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str),
1369            unicode:characters_to_binary(Chars)
1370    end;
1371to_ascii(Bin) when is_binary(Bin) ->
1372    to_ascii(Bin, <<>>).
1373to_ascii(<<>>, Acc) ->
1374    Acc;
1375to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 ->
1376    to_ascii(Rest, <<Acc/binary,C>>);
1377to_ascii(<<_, Rest/binary>>, Acc) ->
1378    to_ascii(Rest, Acc).
1379
1380is_header_only_type(?TYPE_SYMLINK) -> true;
1381is_header_only_type(?TYPE_LINK)    -> true;
1382is_header_only_type(?TYPE_DIR)     -> true;
1383is_header_only_type(_) -> false.
1384
1385foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts)
1386  when is_function(Fun,4) ->
1387    case foldl_read0(Reader, Fun, Accu, Opts) of
1388        {ok, Result, _Reader2} ->
1389            Result;
1390        {error, _} = Err ->
1391            Err
1392    end;
1393foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) ->
1394    {error, {read_mode_expected, Access}};
1395foldl_read(TarName, Fun, Accu, #read_opts{}=Opts)
1396  when is_function(Fun,4) ->
1397    try open(TarName, [read|Opts#read_opts.open_mode]) of
1398        {ok, #reader{access=read}=Reader} ->
1399            try
1400                foldl_read(Reader, Fun, Accu, Opts)
1401            after
1402                _ = close(Reader)
1403            end;
1404        {error, _} = Err ->
1405            Err
1406    catch
1407        throw:Err ->
1408            Err
1409    end.
1410
1411foldl_read0(Reader, Fun, Accu, Opts) ->
1412    try foldl_read1(Fun, Accu, Reader, Opts, #{}) of
1413        {ok,_,_} = Ok ->
1414            Ok
1415    catch
1416        throw:{error, {Reason, Format, Args}} ->
1417            read_verbose(Opts, Format, Args),
1418            {error, Reason};
1419        throw:Err ->
1420            Err
1421    end.
1422
1423foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) ->
1424    {ok, Reader1} = skip_unread(Reader0),
1425    case get_header(Reader1) of
1426        eof ->
1427            Fun(eof, Reader1, Opts, Accu0);
1428        {Header, Reader2} ->
1429            case Header#tar_header.typeflag of
1430                ?TYPE_X_HEADER ->
1431                    {ExtraHeaders2, Reader3} = parse_pax(Reader2),
1432                    ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2),
1433                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3);
1434                ?TYPE_GNU_LONGNAME ->
1435                    {RealName, Reader3} = get_real_name(Reader2),
1436                    ExtraHeaders2 = maps:put(?PAX_PATH,
1437                                             parse_string(RealName), ExtraHeaders),
1438                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
1439                ?TYPE_GNU_LONGLINK ->
1440                    {RealName, Reader3} = get_real_name(Reader2),
1441                    ExtraHeaders2 = maps:put(?PAX_LINKPATH,
1442                                             parse_string(RealName), ExtraHeaders),
1443                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
1444                _ ->
1445                    Header1 = merge_pax(Header, ExtraHeaders),
1446                    {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0),
1447                    foldl_read1(Fun, NewAccu, Reader3, Opts, #{})
1448            end
1449    end.
1450
1451%% Applies all known PAX attributes to the current tar header
1452-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header().
1453merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) ->
1454    do_merge_pax(Header, maps:to_list(ExtraHeaders)).
1455
1456do_merge_pax(Header, []) ->
1457    Header;
1458do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) ->
1459    do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest);
1460do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) ->
1461    do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest);
1462do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) ->
1463    do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest);
1464do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) ->
1465    do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest);
1466do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) ->
1467    Uid2 = binary_to_integer(Uid),
1468    do_merge_pax(Header#tar_header{uid=Uid2}, Rest);
1469do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) ->
1470    Gid2 = binary_to_integer(Gid),
1471    do_merge_pax(Header#tar_header{gid=Gid2}, Rest);
1472do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) ->
1473    Atime2 = parse_pax_time(Atime),
1474    do_merge_pax(Header#tar_header{atime=Atime2}, Rest);
1475do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) ->
1476    Mtime2 = parse_pax_time(Mtime),
1477    do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest);
1478do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) ->
1479    Ctime2 = parse_pax_time(Ctime),
1480    do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest);
1481do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) ->
1482    Size2 = binary_to_integer(Size),
1483    do_merge_pax(Header#tar_header{size=Size2}, Rest);
1484do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) ->
1485    do_merge_pax(Header, Rest);
1486do_merge_pax(Header, [_Ignore|Rest]) ->
1487    do_merge_pax(Header, Rest).
1488
1489%% Returns the time since UNIX epoch as a datetime
1490-spec parse_pax_time(binary()) -> tar_time().
1491parse_pax_time(Bin) when is_binary(Bin) ->
1492    TotalNano = case binary:split(Bin, [<<$.>>]) of
1493                    [SecondsStr, NanoStr0] ->
1494                        Seconds = binary_to_integer(SecondsStr),
1495                        if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE ->
1496                                %% right pad
1497                                PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0),
1498                                Padding = binary:copy(<<$0>>, PaddingN),
1499                                NanoStr1 = <<NanoStr0/binary,Padding/binary>>,
1500                                Nano = binary_to_integer(NanoStr1),
1501                                (Seconds*?BILLION)+Nano;
1502                           byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE ->
1503                                %% right truncate
1504                                NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE),
1505                                Nano = binary_to_integer(NanoStr1),
1506                                (Seconds*?BILLION)+Nano;
1507                           true ->
1508                                (Seconds*?BILLION)+binary_to_integer(NanoStr0)
1509                        end;
1510                    [SecondsStr] ->
1511                        binary_to_integer(SecondsStr)*?BILLION
1512                end,
1513    %% truncate to microseconds
1514    Micro = TotalNano div 1000,
1515    Mega = Micro div 1000000000000,
1516    Secs = Micro div 1000000 - (Mega*1000000),
1517    Secs.
1518
1519%% Given a regular file reader, reads the whole file and
1520%% parses all extended attributes it contains.
1521parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1522    {#{}, Handle};
1523parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
1524    case do_read(Handle0, NumBytes) of
1525        {ok, Bytes, Handle1} ->
1526            do_parse_pax(Handle1, Bytes, #{});
1527        {error, _} = Err ->
1528            throw(Err)
1529    end.
1530
1531do_parse_pax(Reader, <<>>, Headers) ->
1532    {Headers, Reader};
1533do_parse_pax(Reader, Bin, Headers) ->
1534    {Key, Value, Residual} = parse_pax_record(Bin),
1535    NewHeaders = maps:put(Key, Value, Headers),
1536    do_parse_pax(Reader, Residual, NewHeaders).
1537
1538%% Parse an extended attribute
1539parse_pax_record(Bin) when is_binary(Bin) ->
1540    case binary:split(Bin, [<<$\n>>]) of
1541        [Record, Residual] ->
1542            case [X || X <- binary:split(Record, [<<$\s>>], [global]), X =/= <<>>] of
1543                [_Len, Record1] ->
1544                    case  [X || X <- binary:split(Record1, [<<$=>>], [global]), X =/= <<>>] of
1545                        [AttrName, AttrValue] ->
1546                            {AttrName, AttrValue, Residual};
1547                        _Other ->
1548                            throw({error, malformed_pax_record})
1549                    end;
1550                _Other ->
1551                    throw({error, malformed_pax_record})
1552            end;
1553        _Other ->
1554            throw({error, malformed_pax_record})
1555    end.
1556
1557get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1558    {"", Handle};
1559get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
1560    case do_read(Handle0, NumBytes) of
1561        {ok, RealName, Handle1} ->
1562            {RealName, Handle1};
1563        {error, _} = Err ->
1564            throw(Err)
1565    end;
1566get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) ->
1567    case do_read(Reader0, NumBytes) of
1568        {ok, RealName, Reader1} ->
1569            {RealName, Reader1};
1570        {error, _} = Err ->
1571            throw(Err)
1572    end.
1573
1574%% Skip the remaining bytes for the current file entry
1575skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) ->
1576    Padding = skip_padding(Size),
1577    AbsPos = Handle0#reader.pos + (Size-Pos) + Padding,
1578    case do_position(Handle0, AbsPos) of
1579        {ok, _, Handle1} ->
1580            Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size};
1581        Err ->
1582            throw(Err)
1583    end;
1584skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) ->
1585    case do_read(Reader, Size-Pos) of
1586        {ok, _, Reader2} ->
1587            Reader2;
1588        Err ->
1589            throw(Err)
1590    end.
1591
1592skip_padding(0) ->
1593    0;
1594skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 ->
1595    0;
1596skip_padding(Size) when Size =< ?BLOCK_SIZE ->
1597    ?BLOCK_SIZE - Size;
1598skip_padding(Size) ->
1599    ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE).
1600
1601skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 ->
1602    Padding = skip_padding(Pos + ?BLOCK_SIZE),
1603    AbsPos = Pos + Padding,
1604    case do_position(Reader0, AbsPos) of
1605        {ok, _, Reader1} ->
1606            {ok, Reader1};
1607        Err ->
1608            throw(Err)
1609    end;
1610skip_unread(#reader{}=Reader) ->
1611    {ok, Reader};
1612skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1613    skip_unread(Handle);
1614skip_unread(#reg_file_reader{}=Reader) ->
1615    #reg_file_reader{handle=Handle} = skip_file(Reader),
1616    {ok, Handle};
1617skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) ->
1618    skip_unread(Handle);
1619skip_unread(#sparse_file_reader{}=Reader) ->
1620    #sparse_file_reader{handle=Handle} = skip_file(Reader),
1621    {ok, Handle}.
1622
1623write_extracted_element(#tar_header{name=Name,typeflag=Type},
1624                        Bin,
1625                        #read_opts{output=memory}=Opts) ->
1626    case typeflag(Type) of
1627        regular ->
1628            read_verbose(Opts, "x ~ts~n", [Name]),
1629            {ok, {Name, Bin}};
1630        _ ->
1631            ok
1632    end;
1633write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
1634    Name1 = make_safe_path(Name0, Opts),
1635    Created =
1636        case typeflag(Header#tar_header.typeflag) of
1637            regular ->
1638                create_regular(Name1, Name0, Bin, Opts);
1639            directory ->
1640                read_verbose(Opts, "x ~ts~n", [Name0]),
1641                create_extracted_dir(Name1, Opts);
1642            symlink ->
1643                read_verbose(Opts, "x ~ts~n", [Name0]),
1644                create_symlink(Name1, Header#tar_header.linkname, Opts);
1645            Device when Device =:= char orelse Device =:= block ->
1646                %% char/block devices will be created as empty files
1647                %% and then have their major/minor device set later
1648                create_regular(Name1, Name0, <<>>, Opts);
1649            fifo ->
1650                %% fifo devices will be created as empty files
1651                create_regular(Name1, Name0, <<>>, Opts);
1652            Other -> % Ignore.
1653                read_verbose(Opts, "x ~ts - unsupported type ~p~n",
1654                             [Name0, Other]),
1655                not_written
1656        end,
1657    case Created of
1658        ok  -> set_extracted_file_info(Name1, Header);
1659        not_written -> ok
1660    end.
1661
1662make_safe_path([$/|Path], Opts) ->
1663    make_safe_path(Path, Opts);
1664make_safe_path(Path, #read_opts{cwd=Cwd}) ->
1665    case safe_relative_path_links(Path, Cwd) of
1666        unsafe ->
1667            throw({error,{Path,unsafe_path}});
1668        SafePath ->
1669            filename:absname(SafePath, Cwd)
1670    end.
1671
1672safe_relative_path_links(Path, Cwd) ->
1673    case filename:pathtype(Path) of
1674        relative -> safe_relative_path_links(filename:split(Path), Cwd, [], "");
1675        _ -> unsafe
1676    end.
1677
1678safe_relative_path_links([], _Cwd, _PrevLinks, Acc) ->
1679    Acc;
1680
1681safe_relative_path_links([Segment | Segments], Cwd, PrevLinks, Acc) ->
1682    AccSegment = join(Acc, Segment),
1683
1684    case mix_hex_filename:safe_relative_path(AccSegment) of
1685        unsafe ->
1686            unsafe;
1687
1688        SafeAccSegment ->
1689            case file:read_link(join(Cwd, SafeAccSegment)) of
1690                {ok, LinkPath} ->
1691                    case lists:member(LinkPath, PrevLinks) of
1692                        true ->
1693                            unsafe;
1694                        false ->
1695                            case safe_relative_path_links(filename:split(LinkPath), Cwd, [LinkPath | PrevLinks], Acc) of
1696                                unsafe -> unsafe;
1697                                NewAcc -> safe_relative_path_links(Segments, Cwd, [], NewAcc)
1698                            end
1699                    end;
1700
1701                {error, _} ->
1702                    safe_relative_path_links(Segments, Cwd, PrevLinks, SafeAccSegment)
1703            end
1704  end.
1705
1706join([], Path) -> Path;
1707join(Left, Right) -> filename:join(Left, Right).
1708
1709create_regular(Name, NameInArchive, Bin, Opts) ->
1710    case write_extracted_file(Name, Bin, Opts) of
1711        not_written ->
1712            read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]),
1713            not_written;
1714        Ok ->
1715            read_verbose(Opts, "x ~ts~n", [NameInArchive]),
1716            Ok
1717    end.
1718
1719create_extracted_dir(Name, _Opts) ->
1720    case file:make_dir(Name) of
1721        ok -> ok;
1722        {error,enotsup} -> not_written;
1723        {error,eexist} -> not_written;
1724        {error,enoent} -> make_dirs(Name, dir);
1725        {error,Reason} -> throw({error, Reason})
1726    end.
1727
1728create_symlink(Name, Linkname, Opts) ->
1729    case file:make_symlink(Linkname, Name) of
1730        ok -> ok;
1731        {error,enoent} ->
1732            ok = make_dirs(Name, file),
1733            create_symlink(Name, Linkname, Opts);
1734        {error,eexist} -> not_written;
1735        {error,enotsup} ->
1736            read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
1737            not_written;
1738        {error,Reason} -> throw({error, Reason})
1739    end.
1740
1741write_extracted_file(Name, Bin, Opts) ->
1742    Write =
1743        case Opts#read_opts.keep_old_files of
1744            true ->
1745                case file:read_file_info(Name) of
1746                    {ok, _} -> false;
1747                    _ -> true
1748                end;
1749            false -> true
1750        end,
1751    case Write of
1752        true  -> write_file(Name, Bin);
1753        false -> not_written
1754    end.
1755
1756write_file(Name, Bin) ->
1757    case file:write_file(Name, Bin) of
1758        ok -> ok;
1759        {error,enoent} ->
1760            case make_dirs(Name, file) of
1761                ok ->
1762                    write_file(Name, Bin);
1763                {error,Reason} ->
1764                    throw({error, Reason})
1765            end;
1766        {error,Reason} ->
1767            throw({error, Reason})
1768    end.
1769
1770set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok;
1771set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK})    -> ok;
1772set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) ->
1773    set_device_info(Name, Header);
1774set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) ->
1775    set_device_info(Name, Header);
1776set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) ->
1777    Info = #file_info{mode=Mode, mtime=Mtime},
1778    file:write_file_info(Name, Info, [{time, posix}]).
1779
1780set_device_info(Name, #tar_header{}=Header) ->
1781    Mtime = Header#tar_header.mtime,
1782    Mode = Header#tar_header.mode,
1783    Devmajor = Header#tar_header.devmajor,
1784    Devminor = Header#tar_header.devminor,
1785    Info = #file_info{
1786              mode=Mode,
1787              mtime=Mtime,
1788              major_device=Devmajor,
1789              minor_device=Devminor
1790             },
1791    file:write_file_info(Name, Info).
1792
1793%% Makes all directories leading up to the file.
1794
1795make_dirs(Name, file) ->
1796    filelib:ensure_dir(Name);
1797make_dirs(Name, dir) ->
1798    filelib:ensure_dir(filename:join(Name,"*")).
1799
1800%% Prints the message on if the verbose option is given (for reading).
1801read_verbose(#read_opts{verbose=true}, Format, Args) ->
1802    io:format(Format, Args);
1803read_verbose(_, _, _) ->
1804    ok.
1805
1806%% Prints the message on if the verbose option is given.
1807add_verbose(#add_opts{verbose=true}, Format, Args) ->
1808    io:format(Format, Args);
1809add_verbose(_, _, _) ->
1810    ok.
1811
1812%%%%%%%%%%%%%%%%%%
1813%% I/O primitives
1814%%%%%%%%%%%%%%%%%%
1815
1816do_write(#reader{handle=Handle,func=Fun}=Reader0, Data)
1817  when is_function(Fun,2) ->
1818    case Fun(write,{Handle,Data}) of
1819        ok ->
1820            {ok, Pos, Reader1} = do_position(Reader0, {cur,0}),
1821            {ok, Reader1#reader{pos=Pos}};
1822        {error, _} = Err ->
1823            Err
1824    end.
1825
1826do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts)
1827  when is_function(Fun, 2) ->
1828    do_copy(Reader, Source, Opts#add_opts{chunk_size=65536});
1829do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize})
1830    when is_function(Fun, 2) ->
1831    case file:open(Source, [read, binary]) of
1832        {ok, SourceFd} ->
1833            case copy_chunked(Reader, SourceFd, ChunkSize, 0) of
1834                {ok, _Copied, _Reader2} = Ok->
1835                    _ = file:close(SourceFd),
1836                    Ok;
1837                Err ->
1838                    _ = file:close(SourceFd),
1839                    throw(Err)
1840            end;
1841        Err ->
1842            throw(Err)
1843    end.
1844
1845copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) ->
1846    case file:read(Source, ChunkSize) of
1847        {ok, Bin} ->
1848            {ok, Reader2} = do_write(Reader, Bin),
1849            copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin));
1850        eof ->
1851            {ok, Copied, Reader};
1852        Other ->
1853            Other
1854    end.
1855
1856
1857do_position(#reader{handle=Handle,func=Fun}=Reader, Pos)
1858  when is_function(Fun,2)->
1859    case Fun(position, {Handle,Pos}) of
1860        {ok, NewPos} ->
1861            %% since Pos may not always be an absolute seek,
1862            %% make sure we update the reader with the new absolute position
1863            {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}),
1864            {ok, NewPos, Reader#reader{pos=AbsPos}};
1865        Other ->
1866            Other
1867    end.
1868
1869do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) ->
1870    NumBytes = Size - Pos,
1871    ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end,
1872    case do_read(Handle, ActualLen) of
1873        {ok, Bin, Handle2} ->
1874            NewPos = Pos + ActualLen,
1875            NumBytes2 = Size - NewPos,
1876            Reader1 = Reader#reg_file_reader{
1877                        handle=Handle2,
1878                        pos=NewPos,
1879                        num_bytes=NumBytes2},
1880            {ok, Bin, Reader1};
1881        Other ->
1882            Other
1883    end;
1884do_read(#sparse_file_reader{}=Reader, Len) ->
1885    do_sparse_read(Reader, Len);
1886do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len)
1887  when is_function(Fun,2)->
1888    %% Always convert to binary internally
1889    case Fun(read2,{Handle,Len}) of
1890        {ok, List} when is_list(List) ->
1891            Bin = list_to_binary(List),
1892            NewPos = Pos+byte_size(Bin),
1893            {ok, Bin, Reader#reader{pos=NewPos}};
1894        {ok, Bin} when is_binary(Bin) ->
1895            NewPos = Pos+byte_size(Bin),
1896            {ok, Bin, Reader#reader{pos=NewPos}};
1897        Other ->
1898            Other
1899    end.
1900
1901
1902do_sparse_read(Reader, Len) ->
1903    do_sparse_read(Reader, Len, <<>>).
1904
1905do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries]
1906                                  }=Reader0, Len, Acc) ->
1907    %% skip all empty fragments
1908    Reader1 = Reader0#sparse_file_reader{sparse_map=Entries},
1909    do_sparse_read(Reader1, Len, Acc);
1910do_sparse_read(#sparse_file_reader{sparse_map=[],
1911                                   pos=Pos,size=Size}=Reader0, Len, Acc)
1912  when Pos < Size ->
1913    %% if there are no more fragments, it is possible that there is one last sparse hole
1914    %% this behaviour matches the BSD tar utility
1915    %% however, GNU tar stops returning data even if we haven't reached the end
1916    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len),
1917    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1918do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) ->
1919    {ok, Acc, Reader};
1920do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) ->
1921    {ok, Acc, Reader};
1922do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_],
1923                                   pos=Pos}=Reader0, Len, Acc)
1924  when Pos < Offset ->
1925    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos),
1926    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1927do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries],
1928                                   pos=Pos}=Reader0, Len, Acc) ->
1929    %% we're in a data fragment, so read from it
1930    %% end offset of fragment
1931    EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes,
1932    %% bytes left in fragment
1933    NumBytes = EndPos - Pos,
1934    ActualLen = if Len > NumBytes -> NumBytes; true -> Len end,
1935    case do_read(Reader0#sparse_file_reader.handle, ActualLen) of
1936        {ok, Bin, Handle} ->
1937            BytesRead = byte_size(Bin),
1938            ActualEndPos = Pos+BytesRead,
1939            Reader1 = if ActualEndPos =:= EndPos ->
1940                              Reader0#sparse_file_reader{sparse_map=Entries};
1941                         true ->
1942                              Reader0
1943                      end,
1944            Size = Reader1#sparse_file_reader.size,
1945            NumBytes2 = Size - ActualEndPos,
1946            Reader2 = Reader1#sparse_file_reader{
1947                        handle=Handle,
1948                        pos=ActualEndPos,
1949                        num_bytes=NumBytes2},
1950            do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1951        Other ->
1952            Other
1953    end.
1954
1955%% Reads a sparse hole ending at Offset
1956read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) ->
1957    N = Offset - Pos,
1958    N2 = if N > Len ->
1959                 Len;
1960            true ->
1961                 N
1962         end,
1963    Bin = <<0:N2/unit:8>>,
1964    NumBytes = Reader#sparse_file_reader.size - (Pos+N2),
1965    {ok, Bin, Reader#sparse_file_reader{
1966                num_bytes=NumBytes,
1967                pos=Pos+N2}}.
1968
1969-spec do_close(reader()) -> ok | {error, term()}.
1970do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) ->
1971    Fun(close,Handle).
1972
1973%%%%%%%%%%%%%%%%%%
1974%% Option parsing
1975%%%%%%%%%%%%%%%%%%
1976
1977extract_opts(List) ->
1978    extract_opts(List, default_options()).
1979
1980table_opts(List) ->
1981    read_opts(List, default_options()).
1982
1983default_options() ->
1984    {ok, Cwd} = file:get_cwd(),
1985    #read_opts{cwd=Cwd}.
1986
1987extract_opts([keep_old_files|Rest], Opts) ->
1988    extract_opts(Rest, Opts#read_opts{keep_old_files=true});
1989extract_opts([{cwd, Cwd}|Rest], Opts) ->
1990    extract_opts(Rest, Opts#read_opts{cwd=Cwd});
1991extract_opts([{files, Files}|Rest], Opts) ->
1992    Set = ordsets:from_list(Files),
1993    extract_opts(Rest, Opts#read_opts{files=Set});
1994extract_opts([memory|Rest], Opts) ->
1995    extract_opts(Rest, Opts#read_opts{output=memory});
1996extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1997    extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
1998extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1999    extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
2000extract_opts([verbose|Rest], Opts) ->
2001    extract_opts(Rest, Opts#read_opts{verbose=true});
2002extract_opts([Other|Rest], Opts) ->
2003    extract_opts(Rest, read_opts([Other], Opts));
2004extract_opts([], Opts) ->
2005    Opts.
2006
2007read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
2008    read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
2009read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
2010    read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
2011read_opts([verbose|Rest], Opts) ->
2012    read_opts(Rest, Opts#read_opts{verbose=true});
2013read_opts([_|Rest], Opts) ->
2014    read_opts(Rest, Opts);
2015read_opts([], Opts) ->
2016    Opts.
2017