1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 1997-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%% This module implements extraction/creation of tar archives.
21%% It supports reading most common tar formats, namely V7, STAR,
22%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR
23%% format, unless it must use PAX headers, in which case it produces PAX
24%% format.
25%%
26%% The following references where used:
27%%   http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
28%%   http://www.gnu.org/software/tar/manual/html_node/Standard.html
29%%   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
30-module(erl_tar).
31
32-export([init/3,
33         create/2, create/3,
34         extract/1, extract/2,
35         table/1, table/2, t/1, tt/1,
36         open/2, close/1,
37         add/3, add/4,
38         format_error/1]).
39
40-include_lib("kernel/include/file.hrl").
41-include_lib("erl_tar.hrl").
42
43%% Converts the short error reason to a descriptive string.
44-spec format_error(term()) -> string().
45format_error(invalid_tar_checksum) ->
46    "Checksum failed";
47format_error(bad_header) ->
48    "Unrecognized tar header format";
49format_error({bad_header, Reason}) ->
50    lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason]));
51format_error({invalid_header, negative_size}) ->
52    "Invalid header: negative size";
53format_error(invalid_sparse_header_size) ->
54    "Invalid sparse header: negative size";
55format_error(invalid_sparse_map_entry) ->
56    "Invalid sparse map entry";
57format_error({invalid_sparse_map_entry, Reason}) ->
58    lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason]));
59format_error(invalid_end_of_archive) ->
60    "Invalid end of archive";
61format_error(eof) ->
62    "Unexpected end of file";
63format_error(integer_overflow) ->
64    "Failed to parse numeric: integer overflow";
65format_error({misaligned_read, Pos}) ->
66    lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p",
67                                [?BLOCK_SIZE, Pos]));
68format_error(invalid_gnu_1_0_sparsemap) ->
69    "Invalid GNU sparse map (version 1.0)";
70format_error({invalid_gnu_0_1_sparsemap, Format}) ->
71    lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
72format_error(unsafe_path) ->
73    "The path points above the current working directory";
74format_error({Name,Reason}) ->
75    lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
76format_error(Atom) when is_atom(Atom) ->
77    file:format_error(Atom);
78format_error(Term) ->
79    lists:flatten(io_lib:format("~tp", [Term])).
80
81%% Initializes a new reader given a custom file handle and I/O wrappers
82-spec init(UserData :: user_data(), write | read, file_op()) ->
83                  {ok, tar_descriptor()} | {error, badarg}.
84init(UserData, AccessMode, Fun) when is_function(Fun, 2) ->
85    Reader = #reader{handle=UserData,access=AccessMode,func=Fun},
86    {ok, Pos, Reader2} = do_position(Reader, {cur, 0}),
87    {ok, Reader2#reader{pos=Pos}};
88init(_UserData, _AccessMode, _Fun) ->
89    {error, badarg}.
90
91%%%================================================================
92%% Extracts all files from the tar file Name.
93-spec extract(Open :: open_type()) -> ok | {error, term()}.
94extract(Name) ->
95    extract(Name, []).
96
97%% Extracts (all) files from the tar file Name.
98%% Options accepted:
99%%  - cooked: Opens the tar file without mode `raw`
100%%  - compressed: Uncompresses the tar file when reading
101%%  - memory: Returns the tar contents as a list of tuples {Name, Bin}
102%%  - keep_old_files: Extracted files will not overwrite the destination
103%%  - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract
104%%  - verbose: Prints verbose information about the extraction,
105%%  - {cwd, AbsoluteDir}: Sets the current working directory for the extraction
106-spec extract(Open :: open_type(), [extract_opt()]) ->
107                     {ok, [{string(), binary()}]} |
108                     {error, term()} |
109                     ok.
110extract({binary, Bin}, Opts) when is_list(Opts) ->
111    do_extract({binary, Bin}, Opts);
112extract({file, Fd}, Opts) when is_list(Opts) ->
113    do_extract({file, Fd}, Opts);
114extract(#reader{}=Reader, Opts) when is_list(Opts) ->
115    do_extract(Reader, Opts);
116extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
117    do_extract(Name, Opts).
118
119do_extract(Handle, Opts) when is_list(Opts) ->
120    Opts2 = extract_opts(Opts),
121    Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
122    foldl_read(Handle, fun extract1/4, Acc, Opts2).
123
124extract1(eof, Reader, _, Acc) when is_list(Acc) ->
125    {ok, {ok, lists:reverse(Acc)}, Reader};
126extract1(eof, Reader, _, leading_slash) ->
127    error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
128    {ok, ok, Reader};
129extract1(eof, Reader, _, Acc) ->
130    {ok, Acc, Reader};
131extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) ->
132    case check_extract(Name, Opts) of
133        true ->
134            case do_read(Reader0, Size) of
135                {ok, Bin, Reader1} ->
136                    Acc = extract2(Header, Bin, Opts, Acc0),
137                    {ok, Acc, Reader1};
138                {error, _} = Err ->
139                    throw(Err)
140            end;
141        false ->
142            {ok, Acc0, skip_file(Reader0)}
143    end.
144
145extract2(Header, Bin, Opts, Acc) ->
146    case write_extracted_element(Header, Bin, Opts) of
147        ok ->
148            case Header of
149                #tar_header{name="/"++_} ->
150                    leading_slash;
151                #tar_header{} ->
152                    Acc
153            end;
154        {ok, NameBin} when is_list(Acc) ->
155            [NameBin | Acc];
156        {error, _} = Err ->
157            throw(Err)
158    end.
159
160%% Checks if the file Name should be extracted.
161check_extract(_, #read_opts{files=all}) ->
162    true;
163check_extract(Name, #read_opts{files=Files}) ->
164    ordsets:is_element(Name, Files).
165
166%%%================================================================
167%% The following table functions produce a list of information about
168%% the files contained in the archive.
169-type typeflag() :: regular | link | symlink |
170                    char | block | directory |
171                    fifo | reserved | unknown.
172-type mode() :: non_neg_integer().
173-type uid() :: non_neg_integer().
174-type gid() :: non_neg_integer().
175
176-type tar_entry() :: {Name :: name_in_archive(),
177                      Type :: typeflag(),
178                      Size :: non_neg_integer(),
179                      MTime :: tar_time(),
180                      Mode :: mode(),
181                      Uid :: uid(),
182                      Gid :: gid()}.
183
184%% Returns a list of names of the files in the tar file Name.
185-spec table(Open :: open_type()) -> {ok, [name_in_archive()]} | {error, term()}.
186table(Name) ->
187    table(Name, []).
188
189%% Returns a list of names of the files in the tar file Name.
190%% Options accepted: compressed, verbose, cooked.
191-spec table(Open :: open_type(), [compressed | verbose | cooked]) ->
192                   {ok, [name_in_archive() | tar_entry()]} | {error, term()}.
193table(Name, Opts) when is_list(Opts) ->
194    foldl_read(Name, fun table1/4, [], table_opts(Opts)).
195
196table1(eof, Reader, _, Result) ->
197    {ok, {ok, lists:reverse(Result)}, Reader};
198table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) ->
199    Attrs = table1_attrs(Header, Verbose),
200    Reader2 = skip_file(Reader),
201    {ok, [Attrs|Result], Reader2}.
202
203%% Extracts attributes relevant to table1's output
204table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) ->
205    Type = typeflag(Typeflag),
206    Name = Header#tar_header.name,
207    Mtime = Header#tar_header.mtime,
208    Uid = Header#tar_header.uid,
209    Gid = Header#tar_header.gid,
210    Size = Header#tar_header.size,
211    {Name, Type, Size, Mtime, Mode, Uid, Gid};
212table1_attrs(#tar_header{name=Name}, _Verbose) ->
213    Name.
214
215typeflag(?TYPE_REGULAR) -> regular;
216typeflag(?TYPE_REGULAR_A) -> regular;
217typeflag(?TYPE_GNU_SPARSE) -> regular;
218typeflag(?TYPE_CONT) -> regular;
219typeflag(?TYPE_LINK) -> link;
220typeflag(?TYPE_SYMLINK) -> symlink;
221typeflag(?TYPE_CHAR) -> char;
222typeflag(?TYPE_BLOCK) -> block;
223typeflag(?TYPE_DIR) -> directory;
224typeflag(?TYPE_FIFO) -> fifo;
225typeflag(_) -> unknown.
226
227%%%================================================================
228%% Comments for printing the contents of a tape archive,
229%% meant to be invoked from the shell.
230
231%% Prints each filename in the archive
232-spec t(file:filename()) -> ok | {error, term()}.
233t(Name) when is_list(Name); is_binary(Name) ->
234    case table(Name) of
235        {ok, List} ->
236            lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
237        Error ->
238            Error
239    end.
240
241%% Prints verbose information about each file in the archive
242-spec tt(open_type()) -> ok | {error, term()}.
243tt(Name) ->
244    case table(Name, [verbose]) of
245        {ok, List} ->
246            lists:foreach(fun print_header/1, List);
247        Error ->
248            Error
249    end.
250
251%% Used by tt/1 to print a tar_entry tuple
252-spec print_header(tar_entry()) -> ok.
253print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) ->
254    io:format("~s~s ~4w/~-4w ~7w ~s ~s\n",
255              [type_to_string(Type), mode_to_string(Mode),
256               Uid, Gid, Size, time_to_string(Mtime), Name]).
257
258type_to_string(regular)   -> "-";
259type_to_string(directory) -> "d";
260type_to_string(link)      -> "l";
261type_to_string(symlink)   -> "s";
262type_to_string(char)      -> "c";
263type_to_string(block)     -> "b";
264type_to_string(fifo)      -> "f";
265type_to_string(unknown)   -> "?".
266
267%% Converts a numeric mode to its human-readable representation
268mode_to_string(Mode) ->
269    mode_to_string(Mode, "xwrxwrxwr", []).
270mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 ->
271    mode_to_string(Mode bsr 1, T, [C|Acc]);
272mode_to_string(Mode, [_|T], Acc) ->
273    mode_to_string(Mode bsr 1, T, [$-|Acc]);
274mode_to_string(_, [], Acc) ->
275    Acc.
276
277%% Converts a tar_time() (POSIX time) to a readable string
278time_to_string(Secs0) ->
279    Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH),
280    Secs = Epoch + Secs0,
281    DateTime0 = calendar:gregorian_seconds_to_datetime(Secs),
282    DateTime = calendar:universal_time_to_local_time(DateTime0),
283    {{Y, Mon, Day}, {H, Min, _}} = DateTime,
284    io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]).
285
286two_d(N) ->
287    tl(integer_to_list(N + 100)).
288
289month(1) -> "Jan";
290month(2) -> "Feb";
291month(3) -> "Mar";
292month(4) -> "Apr";
293month(5) -> "May";
294month(6) -> "Jun";
295month(7) -> "Jul";
296month(8) -> "Aug";
297month(9) -> "Sep";
298month(10) -> "Oct";
299month(11) -> "Nov";
300month(12) -> "Dec".
301
302%%%================================================================
303%% The open function with friends is to keep the file and binary api of this module
304-type open_type() :: file:filename_all()
305                     | {binary, binary()}
306                     | {file, file:io_device()}.
307-spec open(Open :: open_type(), [write | compressed | cooked]) ->
308                  {ok, tar_descriptor()} | {error, term()}.
309open({binary, Bin}, Mode) when is_binary(Bin) ->
310    do_open({binary, Bin}, Mode);
311open({file, Fd}, Mode) ->
312    do_open({file, Fd}, Mode);
313open(Name, Mode) when is_list(Name); is_binary(Name) ->
314    do_open(Name, Mode).
315
316do_open(Name, Mode) when is_list(Mode) ->
317    case open_mode(Mode) of
318        {ok, Access, Raw, Opts} ->
319            open1(Name, Access, Raw, Opts);
320        {error, Reason} ->
321            {error, {Name, Reason}}
322    end.
323
324open1({binary,Bin0}=Handle, read, _Raw, Opts) when is_binary(Bin0) ->
325    Bin = case lists:member(compressed, Opts) of
326        true ->
327            try
328                zlib:gunzip(Bin0)
329            catch
330                _:_ -> Bin0
331            end;
332        false ->
333            Bin0
334    end,
335
336    case file:open(Bin, [ram,binary,read]) of
337        {ok,File} ->
338            {ok, #reader{handle=File,access=read,func=fun file_op/2}};
339        {error, Reason} ->
340            {error, {Handle, Reason}}
341    end;
342open1({file, Fd}=Handle, read, [raw], Opts) ->
343    case not lists:member(compressed, Opts) of
344        true ->
345            Reader = #reader{handle=Fd,access=read,func=fun file_op/2},
346            case do_position(Reader, {cur, 0}) of
347                {ok, Pos, Reader2} ->
348                    {ok, Reader2#reader{pos=Pos}};
349                {error, Reason} ->
350                    {error, {Handle, Reason}}
351            end;
352        false ->
353            {error, {Handle, {incompatible_option, compressed}}}
354    end;
355open1({file, _Fd}=Handle, read, [], _Opts) ->
356    {error, {Handle, {incompatible_option, cooked}}};
357open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) ->
358    case file:open(Name, Raw ++ [binary, Access|Opts]) of
359        {ok, File} ->
360            {ok, #reader{handle=File,access=Access,func=fun file_op/2}};
361        {error, Reason} ->
362            {error, {Name, Reason}}
363    end.
364
365open_mode(Mode) ->
366    open_mode(Mode, false, [raw], []).
367
368open_mode(read, _, Raw, _) ->
369    {ok, read, Raw, []};
370open_mode(write, _, Raw, _) ->
371    {ok, write, Raw, []};
372open_mode([read|Rest], false, Raw, Opts) ->
373    open_mode(Rest, read, Raw, Opts);
374open_mode([write|Rest], false, Raw, Opts) ->
375    open_mode(Rest, write, Raw, Opts);
376open_mode([compressed|Rest], Access, Raw, Opts) ->
377    open_mode(Rest, Access, Raw, [compressed,read_ahead|Opts]);
378open_mode([cooked|Rest], Access, _Raw, Opts) ->
379    open_mode(Rest, Access, [], Opts);
380open_mode([], Access, Raw, Opts) ->
381    {ok, Access, Raw, Opts};
382open_mode(_, _, _, _) ->
383    {error, einval}.
384
385file_op(write, {Fd, Data}) ->
386    file:write(Fd, Data);
387file_op(position, {Fd, Pos}) ->
388    file:position(Fd, Pos);
389file_op(read2, {Fd, Size}) ->
390    file:read(Fd, Size);
391file_op(close, Fd) ->
392    file:close(Fd).
393
394%% Closes a tar archive.
395-spec close(TarDescriptor :: tar_descriptor()) -> ok | {error, term()}.
396close(#reader{access=read}=Reader) ->
397    ok = do_close(Reader);
398close(#reader{access=write}=Reader) ->
399    {ok, Reader2} = pad_file(Reader),
400    ok = do_close(Reader2),
401    ok;
402close(_) ->
403    {error, einval}.
404
405pad_file(#reader{pos=Pos}=Reader) ->
406    %% There must be at least two zero blocks at the end.
407    PadCurrent = skip_padding(Pos+?BLOCK_SIZE),
408    Padding = <<0:PadCurrent/unit:8>>,
409    do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]).
410
411
412%%%================================================================
413%% Creation/modification of tar archives
414
415%% Creates a tar file Name containing the given files.
416-spec create(file:filename_all(), filelist()) -> ok | {error, {string(), term()}}.
417create(Name, FileList) when is_list(Name); is_binary(Name) ->
418    create(Name, FileList, []).
419
420%% Creates a tar archive Name containing the given files.
421%% Accepted options: verbose, compressed, cooked
422-spec create(file:filename_all(), filelist(), [create_opt()]) ->
423                    ok | {error, term()} | {error, {string(), term()}}.
424create(Name, FileList, Options) when is_list(Name); is_binary(Name) ->
425    Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked)
426                        end, Options),
427    case open(Name, [write|Mode]) of
428        {ok, TarFile} ->
429            do_create(TarFile, FileList, Options);
430        {error, _} = Err ->
431            Err
432    end.
433
434do_create(TarFile, [], _Opts) ->
435    close(TarFile);
436do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) ->
437    case add(TarFile, NameOrBin, NameInArchive, Opts) of
438        ok ->
439            do_create(TarFile, Rest, Opts);
440        {error, _} = Err ->
441            _ = close(TarFile),
442            Err
443    end;
444do_create(TarFile, [Name|Rest], Opts) ->
445    case add(TarFile, Name, Name, Opts) of
446        ok ->
447            do_create(TarFile, Rest, Opts);
448        {error, _} = Err ->
449            _ = close(TarFile),
450            Err
451    end.
452
453%% Adds a file to a tape archive.
454-type add_type() :: name_in_archive()
455                  | {name_in_archive(), file:filename_all()}.
456-spec add(TarDescriptor, AddType, Options) -> ok | {error, term()} when
457    TarDescriptor :: tar_descriptor(),
458    AddType :: add_type(),
459    Options :: [add_opt()].
460add(Reader, {NameInArchive, Name}, Opts)
461  when is_list(NameInArchive), is_list(Name) ->
462    do_add(Reader, Name, NameInArchive, Opts);
463add(Reader, {NameInArchive, Bin}, Opts)
464  when is_list(NameInArchive), is_binary(Bin) ->
465    do_add(Reader, Bin, NameInArchive, Opts);
466add(Reader, Name, Opts) when is_list(Name) ->
467    do_add(Reader, Name, Name, Opts).
468
469-spec add(TarDescriptor, Filename, NameInArchive, Options) ->
470        ok | {error, term()} when
471    TarDescriptor :: tar_descriptor(),
472    Filename :: file:filename_all(),
473    NameInArchive :: name_in_archive(),
474    Options :: [add_opt()].
475add(Reader, NameOrBin, NameInArchive, Options)
476  when is_list(NameOrBin); is_binary(NameOrBin),
477       is_list(NameInArchive), is_list(Options) ->
478    do_add(Reader, NameOrBin, NameInArchive, Options).
479
480do_add(#reader{access=write}=Reader, Name, NameInArchive, Options)
481  when is_list(NameInArchive), is_list(Options) ->
482    RF = apply_file_info_opts_fun(Options, read_link_info),
483    Opts = #add_opts{read_info=RF},
484    add1(Reader, Name, NameInArchive, add_opts(Options, Options, Opts));
485do_add(#reader{access=read},_,_,_) ->
486    {error, eacces};
487do_add(Reader,_,_,_) ->
488    {error, {badarg, Reader}}.
489
490add_opts([dereference|T], AllOptions, Opts) ->
491    RF = apply_file_info_opts_fun(AllOptions, read_file_info),
492    add_opts(T, AllOptions, Opts#add_opts{read_info=RF});
493add_opts([verbose|T], AllOptions, Opts) ->
494    add_opts(T, AllOptions, Opts#add_opts{verbose=true});
495add_opts([{chunks,N}|T], AllOptions, Opts) ->
496    add_opts(T, AllOptions, Opts#add_opts{chunk_size=N});
497add_opts([{atime,Value}|T], AllOptions, Opts) ->
498    add_opts(T, AllOptions, Opts#add_opts{atime=Value});
499add_opts([{mtime,Value}|T], AllOptions, Opts) ->
500    add_opts(T, AllOptions, Opts#add_opts{mtime=Value});
501add_opts([{ctime,Value}|T], AllOptions, Opts) ->
502    add_opts(T, AllOptions, Opts#add_opts{ctime=Value});
503add_opts([{uid,Value}|T], AllOptions, Opts) ->
504    add_opts(T, AllOptions, Opts#add_opts{uid=Value});
505add_opts([{gid,Value}|T], AllOptions, Opts) ->
506    add_opts(T, AllOptions, Opts#add_opts{gid=Value});
507add_opts([_|T], AllOptions, Opts) ->
508    add_opts(T, AllOptions, Opts);
509add_opts([], _AllOptions, Opts) ->
510    Opts.
511
512apply_file_info_opts(Opts, {ok, FileInfo}) ->
513    {ok, do_apply_file_info_opts(Opts, FileInfo)};
514apply_file_info_opts(_Opts, Other) ->
515    Other.
516
517do_apply_file_info_opts([{atime,Value}|T], FileInfo) ->
518    do_apply_file_info_opts(T, FileInfo#file_info{atime=Value});
519do_apply_file_info_opts([{mtime,Value}|T], FileInfo) ->
520    do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value});
521do_apply_file_info_opts([{ctime,Value}|T], FileInfo) ->
522    do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value});
523do_apply_file_info_opts([{uid,Value}|T], FileInfo) ->
524    do_apply_file_info_opts(T, FileInfo#file_info{uid=Value});
525do_apply_file_info_opts([{gid,Value}|T], FileInfo) ->
526    do_apply_file_info_opts(T, FileInfo#file_info{gid=Value});
527do_apply_file_info_opts([_|T], FileInfo) ->
528    do_apply_file_info_opts(T, FileInfo);
529do_apply_file_info_opts([], FileInfo) ->
530    FileInfo.
531
532apply_file_info_opts_fun(Options, InfoFunction) ->
533   fun(F) ->
534       apply_file_info_opts(Options, file:InfoFunction(F, [{time, posix}]))
535   end.
536
537add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts)
538  when is_list(Name) ->
539    Res = case ReadInfo(Name) of
540              {error, Reason0} ->
541                  {error, {Name, Reason0}};
542              {ok, #file_info{type=symlink}=Fi} ->
543                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
544                  {ok, Linkname} = file:read_link(Name),
545                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
546                  add_header(Reader, Header, Opts);
547              {ok, #file_info{type=regular}=Fi} ->
548                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
549                  Header = fileinfo_to_header(NameInArchive, Fi, false),
550                  {ok, Reader2} = add_header(Reader, Header, Opts),
551                  FileSize = Header#tar_header.size,
552                  {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts),
553                  Padding = skip_padding(FileSize),
554                  Pad = <<0:Padding/unit:8>>,
555                  do_write(Reader3, Pad);
556              {ok, #file_info{type=directory}=Fi} ->
557                  add_directory(Reader, Name, NameInArchive, Fi, Opts);
558              {ok, #file_info{}=Fi} ->
559                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
560                  Header = fileinfo_to_header(NameInArchive, Fi, false),
561                  add_header(Reader, Header, Opts)
562          end,
563    case Res of
564        ok -> ok;
565        {ok, _Reader} -> ok;
566        {error, _Reason} = Err -> Err
567    end;
568add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) ->
569    add_verbose(Opts, "a ~ts~n", [NameInArchive]),
570    Now = os:system_time(seconds),
571    Header = #tar_header{
572                name = NameInArchive,
573                size = byte_size(Bin),
574                typeflag = ?TYPE_REGULAR,
575                atime = add_opts_time(Opts#add_opts.atime, Now),
576                mtime = add_opts_time(Opts#add_opts.mtime, Now),
577                ctime = add_opts_time(Opts#add_opts.ctime, Now),
578                uid = Opts#add_opts.uid,
579                gid = Opts#add_opts.gid,
580                mode = 8#100644},
581    {ok, Reader2} = add_header(Reader, Header, Opts),
582    Padding = skip_padding(byte_size(Bin)),
583    Data = [Bin, <<0:Padding/unit:8>>],
584    case do_write(Reader2, Data) of
585        {ok, _Reader3} -> ok;
586        {error, Reason} -> {error, {NameInArchive, Reason}}
587    end.
588
589add_opts_time(undefined, Now) -> Now;
590add_opts_time(Time, _Now) -> Time.
591
592add_directory(Reader, DirName, NameInArchive, Info, Opts) ->
593    case file:list_dir(DirName) of
594        {ok, []} ->
595            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
596            Header = fileinfo_to_header(NameInArchive, Info, false),
597            add_header(Reader, Header, Opts);
598        {ok, Files} ->
599            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
600            try add_files(Reader, Files, DirName, NameInArchive, Opts) of
601                ok -> ok;
602                {error, _} = Err -> Err
603            catch
604                throw:{error, {_Name, _Reason}} = Err -> Err;
605                throw:{error, Reason} -> {error, {DirName, Reason}}
606            end;
607        {error, Reason} ->
608            {error, {DirName, Reason}}
609    end.
610
611add_files(_Reader, [], _Dir, _DirInArchive, _Opts) ->
612    ok;
613add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) ->
614    FullName = filename:join(Dir, Name),
615    NameInArchive = filename:join(DirInArchive, Name),
616    Res = case Info(FullName) of
617              {error, Reason} ->
618                  {error, {FullName, Reason}};
619              {ok, #file_info{type=directory}=Fi} ->
620                  add_directory(Reader, FullName, NameInArchive, Fi, Opts);
621              {ok, #file_info{type=symlink}=Fi} ->
622                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
623                  {ok, Linkname} = file:read_link(FullName),
624                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
625                  add_header(Reader, Header, Opts);
626              {ok, #file_info{type=regular}=Fi} ->
627                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
628                  Header = fileinfo_to_header(NameInArchive, Fi, false),
629                  {ok, Reader2} = add_header(Reader, Header, Opts),
630                  FileSize = Header#tar_header.size,
631                  {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts),
632                  Padding = skip_padding(FileSize),
633                  Pad = <<0:Padding/unit:8>>,
634                  do_write(Reader3, Pad);
635              {ok, #file_info{}=Fi} ->
636                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
637                  Header = fileinfo_to_header(NameInArchive, Fi, false),
638                  add_header(Reader, Header, Opts)
639          end,
640    case Res of
641        ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts);
642        {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts);
643        {error, _} = Err -> Err
644    end.
645
646format_string(String, Size) when length(String) > Size ->
647    throw({error, {write_string, field_too_long}});
648format_string(String, Size) ->
649    Ascii = to_ascii(String),
650    if byte_size(Ascii) < Size ->
651            [Ascii, 0];
652       true ->
653            Ascii
654    end.
655
656format_octal(Octal) ->
657    iolist_to_binary(io_lib:fwrite("~.8B", [Octal])).
658
659add_header(#reader{}=Reader, #tar_header{}=Header, Opts) ->
660    {ok, Iodata} = build_header(Header, Opts),
661    do_write(Reader, Iodata).
662
663write_to_block(Block, IoData, Start) when is_list(IoData) ->
664    write_to_block(Block, iolist_to_binary(IoData), Start);
665write_to_block(Block, Bin, Start) when is_binary(Bin) ->
666    Size = byte_size(Bin),
667    <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block,
668    <<Head:Start/unit:8, Bin/binary, Rest/binary>>.
669
670build_header(#tar_header{}=Header, Opts) ->
671    #tar_header{
672       name=Name,
673       mode=Mode,
674       uid=Uid,
675       gid=Gid,
676       size=Size,
677       typeflag=Type,
678       linkname=Linkname,
679       uname=Uname,
680       gname=Gname,
681       devmajor=Devmaj,
682       devminor=Devmin
683      } = Header,
684    Mtime = Header#tar_header.mtime,
685
686    Block0 = ?ZERO_BLOCK,
687    {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}),
688    Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode),
689    {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0),
690    {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1),
691    {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2),
692    {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3),
693    {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4),
694    {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN,
695                                  Linkname, ?PAX_LINKPATH, Pax5),
696    {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN,
697                                  Uname, ?PAX_UNAME, Pax6),
698    {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN,
699                                   Gname, ?PAX_GNAME, Pax7),
700    {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN,
701                                    Devmaj, ?PAX_NONE, Pax8),
702    {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN,
703                                     Devmin, ?PAX_NONE, Pax9),
704    {Block13, Pax11} = set_path(Block12, Pax10),
705    PaxEntry = case maps:size(Pax11) of
706                   0 -> [];
707                   _ -> build_pax_entry(Header, Pax11, Opts)
708               end,
709    Block14 = set_format(Block13, ?FORMAT_USTAR),
710    Block15 = set_checksum(Block14),
711    {ok, [PaxEntry, Block15]}.
712
713set_path(Block0, Pax) ->
714     %% only use ustar header when name is too long
715    case maps:get(?PAX_PATH, Pax, nil) of
716        nil ->
717            {Block0, Pax};
718        PaxPath ->
719            case split_ustar_path(PaxPath) of
720                {ok, UstarName, UstarPrefix} ->
721                    {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN,
722                                               UstarName, ?PAX_NONE, #{}),
723                    {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN,
724                                               UstarPrefix, ?PAX_NONE, #{}),
725                    {Block2, maps:remove(?PAX_PATH, Pax)};
726                false ->
727                    {Block0, Pax}
728            end
729    end.
730
731set_format(Block0, Format)
732  when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX ->
733    Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC),
734    write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION);
735set_format(_Block, Format) ->
736    throw({error, {invalid_format, Format}}).
737
738set_checksum(Block) ->
739    Checksum = compute_checksum(Block),
740    write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum).
741
742build_pax_entry(Header, PaxAttrs, Opts) ->
743    Path = Header#tar_header.name,
744    Filename = filename:basename(Path),
745    Dir = filename:dirname(Path),
746    Path2 = filename:join([Dir, "PaxHeaders.0", Filename]),
747    AsciiPath = to_ascii(Path2),
748    Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN ->
749                    binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1);
750               true ->
751                    AsciiPath
752            end,
753    Keys = maps:keys(PaxAttrs),
754    SortedKeys = lists:sort(Keys),
755    PaxFile = build_pax_file(SortedKeys, PaxAttrs),
756    Size = byte_size(PaxFile),
757    Padding = (?BLOCK_SIZE -
758                   (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE,
759    Pad = <<0:Padding/unit:8>>,
760    PaxHeader = #tar_header{
761                   name=unicode:characters_to_list(Path3),
762                   size=Size,
763                   mtime=Header#tar_header.mtime,
764                   atime=Header#tar_header.atime,
765                   ctime=Header#tar_header.ctime,
766                   typeflag=?TYPE_X_HEADER
767                  },
768    {ok, PaxHeaderData} = build_header(PaxHeader, Opts),
769    [PaxHeaderData, PaxFile, Pad].
770
771build_pax_file(Keys, PaxAttrs) ->
772    build_pax_file(Keys, PaxAttrs, []).
773build_pax_file([], _, Acc) ->
774    unicode:characters_to_binary(Acc);
775build_pax_file([K|Rest], Attrs, Acc) ->
776    V = maps:get(K, Attrs),
777    Size = sizeof(K) + sizeof(V) + 3,
778    Size2 = sizeof(Size) + Size,
779    Key = to_string(K),
780    Value = to_string(V),
781    Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])),
782    if byte_size(Record) =/= Size2 ->
783            Size3 = byte_size(Record),
784            Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]),
785            build_pax_file(Rest, Attrs, [Acc, Record2]);
786       true ->
787            build_pax_file(Rest, Attrs, [Acc, Record])
788    end.
789
790sizeof(Bin) when is_binary(Bin) ->
791    byte_size(Bin);
792sizeof(List) when is_list(List) ->
793    length(List);
794sizeof(N) when is_integer(N) ->
795    byte_size(integer_to_binary(N));
796sizeof(N) when is_float(N) ->
797    byte_size(float_to_binary(N)).
798
799to_string(Bin) when is_binary(Bin) ->
800    unicode:characters_to_list(Bin);
801to_string(List) when is_list(List) ->
802    List;
803to_string(N) when is_integer(N) ->
804    integer_to_list(N);
805to_string(N) when is_float(N) ->
806    float_to_list(N).
807
808split_ustar_path(Path) ->
809    Len = length(Path),
810    NotAscii = not is_ascii(Path),
811    if Len =< ?V7_NAME_LEN; NotAscii ->
812            false;
813       true ->
814            PathBin = binary:list_to_bin(Path),
815            case binary:split(PathBin, [<<$/>>], [global, trim_all]) of
816                [Part] when byte_size(Part) >= ?V7_NAME_LEN ->
817                    false;
818                Parts ->
819                    case lists:last(Parts) of
820                        Name when byte_size(Name) >= ?V7_NAME_LEN ->
821                            false;
822                        Name ->
823                            Parts2 = lists:sublist(Parts, length(Parts) - 1),
824                            join_split_ustar_path(Parts2, {ok, Name, nil})
825                    end
826            end
827    end.
828
829join_split_ustar_path([], Acc) ->
830    Acc;
831join_split_ustar_path([Part|_], {ok, _, nil})
832  when byte_size(Part) > ?USTAR_PREFIX_LEN ->
833    false;
834join_split_ustar_path([Part|_], {ok, _Name, Acc})
835  when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN ->
836    false;
837join_split_ustar_path([Part|Rest], {ok, Name, nil}) ->
838    join_split_ustar_path(Rest, {ok, Name, Part});
839join_split_ustar_path([Part|Rest], {ok, Name, Acc}) ->
840    join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}).
841
842write_octal(Block, Pos, Size, X) ->
843    Octal = zero_pad(format_octal(X), Size-1),
844    if byte_size(Octal) < Size ->
845            write_to_block(Block, Octal, Pos);
846       true ->
847            throw({error, {write_failed, octal_field_too_long}})
848    end.
849
850write_string(Block, Pos, Size, Str, PaxAttr, Pax0) ->
851    NotAscii = not is_ascii(Str),
852    if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) ->
853            Pax1 = maps:put(PaxAttr, Str, Pax0),
854            {Block, Pax1};
855       true ->
856            Formatted = format_string(Str, Size),
857            {write_to_block(Block, Formatted, Pos), Pax0}
858    end.
859write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) ->
860    %% attempt octal
861    Octal = zero_pad(format_octal(X), Size-1),
862    if byte_size(Octal) < Size ->
863            {write_to_block(Block, [Octal, 0], Pos), Pax0};
864       PaxAttr =/= ?PAX_NONE ->
865            Pax1 = maps:put(PaxAttr, X, Pax0),
866            {Block, Pax1};
867       true ->
868            throw({error, {write_failed, numeric_field_too_long}})
869    end.
870
871zero_pad(Str, Size) when byte_size(Str) >= Size ->
872    Str;
873zero_pad(Str, Size) ->
874    Padding = Size - byte_size(Str),
875    Pad = binary:copy(<<$0>>, Padding),
876    <<Pad/binary, Str/binary>>.
877
878
879%%%================================================================
880%% Functions for creating or modifying tar archives
881
882read_block(Reader) ->
883    case do_read(Reader, ?BLOCK_SIZE) of
884        eof ->
885            throw({error, eof});
886        %% Two zero blocks mark the end of the archive
887        {ok, ?ZERO_BLOCK, Reader1} ->
888            case do_read(Reader1, ?BLOCK_SIZE) of
889                eof ->
890                    % This is technically a malformed end-of-archive marker,
891                    % as two ZERO_BLOCKs are expected as the marker,
892                    % but if we've already made it this far, we should just ignore it
893                    eof;
894                {ok, ?ZERO_BLOCK, _Reader2} ->
895                    eof;
896                {ok, _Block, _Reader2} ->
897                    throw({error, invalid_end_of_archive});
898                {error,_} = Err ->
899                    throw(Err)
900            end;
901        {ok, Block, Reader1} when is_binary(Block) ->
902            {ok, Block, Reader1};
903        {error, _} = Err ->
904            throw(Err)
905    end.
906
907get_header(#reader{}=Reader) ->
908    case read_block(Reader) of
909        eof ->
910            eof;
911        {ok, Block, Reader1} ->
912            convert_header(Block, Reader1)
913    end.
914
915%% Converts the tar header to a record.
916to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
917    #header_v7{
918       name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN),
919       mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN),
920       uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN),
921       gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN),
922       size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN),
923       mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN),
924       checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN),
925       typeflag=binary:at(Bin, ?V7_TYPE),
926       linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN)
927      };
928to_v7(_) ->
929    {error, header_block_too_small}.
930
931to_gnu(#header_v7{}=V7, Bin)
932  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
933    #header_gnu{
934       header_v7=V7,
935       magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN),
936       version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN),
937       uname=binary_part(Bin, 265, 32),
938       gname=binary_part(Bin, 297, 32),
939       devmajor=binary_part(Bin, 329, 8),
940       devminor=binary_part(Bin, 337, 8),
941       atime=binary_part(Bin, 345, 12),
942       ctime=binary_part(Bin, 357, 12),
943       sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)),
944       real_size=binary_part(Bin, 483, 12)
945      }.
946
947to_star(#header_v7{}=V7, Bin)
948  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
949    #header_star{
950       header_v7=V7,
951       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
952       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
953       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
954       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
955       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
956       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
957       prefix=binary_part(Bin, 345, 131),
958       atime=binary_part(Bin, 476, 12),
959       ctime=binary_part(Bin, 488, 12),
960       trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN)
961      }.
962
963to_ustar(#header_v7{}=V7, Bin)
964  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
965    #header_ustar{
966       header_v7=V7,
967       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
968       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
969       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
970       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
971       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
972       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
973       prefix=binary_part(Bin, 345, 155)
974      }.
975
976to_sparse_array(Bin) when is_binary(Bin) ->
977    MaxEntries = byte_size(Bin) div 24,
978    IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries),
979    Entries = parse_sparse_entries(Bin, MaxEntries-1, []),
980    #sparse_array{
981       entries=Entries,
982       max_entries=MaxEntries,
983       is_extended=IsExtended
984      }.
985
986parse_sparse_entries(<<>>, _, Acc) ->
987    Acc;
988parse_sparse_entries(_, -1, Acc) ->
989    Acc;
990parse_sparse_entries(Bin, N, Acc) ->
991    case to_sparse_entry(binary_part(Bin, N*24, 24)) of
992        nil ->
993            parse_sparse_entries(Bin, N-1, Acc);
994        Entry = #sparse_entry{} ->
995            parse_sparse_entries(Bin, N-1, [Entry|Acc])
996    end.
997
998-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>).
999to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 ->
1000    OffsetBin = binary_part(Bin, 0, 12),
1001    NumBytesBin = binary_part(Bin, 12, 12),
1002    case {OffsetBin, NumBytesBin} of
1003        {?EMPTY_ENTRY, ?EMPTY_ENTRY} ->
1004            nil;
1005        _ ->
1006            #sparse_entry{
1007               offset=parse_numeric(OffsetBin),
1008               num_bytes=parse_numeric(NumBytesBin)}
1009    end.
1010
1011-spec get_format(binary()) -> {ok, pos_integer(), header_v7()}
1012                                  | ?FORMAT_UNKNOWN
1013                                  | {error, term()}.
1014get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1015    do_get_format(to_v7(Bin), Bin).
1016
1017do_get_format({error, _} = Err, _Bin) ->
1018    Err;
1019do_get_format(#header_v7{}=V7, Bin)
1020  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1021    Checksum = parse_octal(V7#header_v7.checksum),
1022    Chk1 = compute_checksum(Bin),
1023    Chk2 = compute_signed_checksum(Bin),
1024    if Checksum =/= Chk1 andalso Checksum =/= Chk2 ->
1025            ?FORMAT_UNKNOWN;
1026       true ->
1027            %% guess magic
1028            Ustar = to_ustar(V7, Bin),
1029            Star = to_star(V7, Bin),
1030            Magic = Ustar#header_ustar.magic,
1031            Version = Ustar#header_ustar.version,
1032            Trailer = Star#header_star.trailer,
1033            Format = if
1034                         Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR ->
1035                             ?FORMAT_STAR;
1036                         Magic =:= ?MAGIC_USTAR ->
1037                             ?FORMAT_USTAR;
1038                         Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU ->
1039                             ?FORMAT_GNU;
1040                         true ->
1041                             ?FORMAT_V7
1042                     end,
1043            {ok, Format, V7}
1044    end.
1045
1046unpack_format(Format, #header_v7{}=V7, Bin, Reader)
1047  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1048    Mtime = parse_numeric(V7#header_v7.mtime),
1049    Header0 = #tar_header{
1050                 name=parse_string(V7#header_v7.name),
1051                 mode=parse_numeric(V7#header_v7.mode),
1052                 uid=parse_numeric(V7#header_v7.uid),
1053                 gid=parse_numeric(V7#header_v7.gid),
1054                 size=parse_numeric(V7#header_v7.size),
1055                 mtime=Mtime,
1056                 atime=Mtime,
1057                 ctime=Mtime,
1058                 typeflag=V7#header_v7.typeflag,
1059                 linkname=parse_string(V7#header_v7.linkname)
1060                },
1061    Typeflag = Header0#tar_header.typeflag,
1062    Header1 = if Format > ?FORMAT_V7 ->
1063                      unpack_modern(Format, V7, Bin, Header0);
1064                 true ->
1065                      Name = Header0#tar_header.name,
1066                      Header0#tar_header{name=safe_join_path("", Name)}
1067              end,
1068    HeaderOnly = is_header_only_type(Typeflag),
1069    Header2 = if HeaderOnly ->
1070                      Header1#tar_header{size=0};
1071                 true ->
1072                      Header1
1073              end,
1074    if Typeflag =:= ?TYPE_GNU_SPARSE ->
1075            Gnu = to_gnu(V7, Bin),
1076            RealSize = parse_numeric(Gnu#header_gnu.real_size),
1077            {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader),
1078            Header3 = Header2#tar_header{size=RealSize},
1079            {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)};
1080       true ->
1081            FileReader = #reg_file_reader{
1082                            handle=Reader,
1083                            num_bytes=Header2#tar_header.size,
1084                            size=Header2#tar_header.size,
1085                            pos = 0
1086                           },
1087            {Header2, FileReader}
1088    end.
1089
1090unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
1091  when is_binary(Bin) ->
1092    Typeflag = Header0#tar_header.typeflag,
1093    Ustar = to_ustar(V7, Bin),
1094    H0 = Header0#tar_header{
1095            uname=parse_string(Ustar#header_ustar.uname),
1096            gname=parse_string(Ustar#header_ustar.gname)},
1097    H1 = if Typeflag =:= ?TYPE_CHAR
1098            orelse Typeflag =:= ?TYPE_BLOCK ->
1099                Ma = parse_numeric(Ustar#header_ustar.devmajor),
1100                Mi = parse_numeric(Ustar#header_ustar.devminor),
1101                H0#tar_header{
1102                    devmajor=Ma,
1103                    devminor=Mi
1104                };
1105            true ->
1106                H0
1107        end,
1108    {Prefix, H2} = case Format of
1109                        ?FORMAT_USTAR ->
1110                            {parse_string(Ustar#header_ustar.prefix), H1};
1111                        ?FORMAT_STAR ->
1112                            Star = to_star(V7, Bin),
1113                            Prefix0 = parse_string(Star#header_star.prefix),
1114                            Atime0 = Star#header_star.atime,
1115                            Atime = parse_numeric(Atime0),
1116                            Ctime0 = Star#header_star.ctime,
1117                            Ctime = parse_numeric(Ctime0),
1118                            {Prefix0, H1#tar_header{
1119                                        atime=Atime,
1120                                        ctime=Ctime
1121                                    }};
1122                        _ ->
1123                            {"", H1}
1124                    end,
1125    Name = H2#tar_header.name,
1126    H2#tar_header{name=safe_join_path(Prefix, Name)}.
1127
1128
1129safe_join_path([], Name) ->
1130    filename:join([Name]);
1131safe_join_path(Prefix, []) ->
1132    filename:join([Prefix]);
1133safe_join_path(Prefix, Name) ->
1134    filename:join(Prefix, Name).
1135
1136new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
1137    true = validate_sparse_entries(Sparsemap, RealSize),
1138    #sparse_file_reader{
1139       handle = Reader,
1140       num_bytes = RealSize,
1141       pos = 0,
1142       size = RealSize,
1143       sparse_map = Sparsemap}.
1144
1145validate_sparse_entries(Entries, RealSize) ->
1146    validate_sparse_entries(Entries, RealSize, 0, 0).
1147validate_sparse_entries([], _RealSize, _I, _LastOffset) ->
1148    true;
1149validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) ->
1150    Offset = Entry#sparse_entry.offset,
1151    NumBytes = Entry#sparse_entry.num_bytes,
1152    if
1153        Offset > ?MAX_INT64-NumBytes ->
1154            throw({error, {invalid_sparse_map_entry, offset_too_large}});
1155        Offset+NumBytes > RealSize ->
1156            throw({error, {invalid_sparse_map_entry, offset_too_large}});
1157        I > 0 andalso LastOffset > Offset ->
1158            throw({error, {invalid_sparse_map_entry, overlapping_offsets}});
1159        true ->
1160            ok
1161    end,
1162    validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes).
1163
1164
1165-spec parse_sparse_map(header_gnu(), descriptor_type()) ->
1166                              {[sparse_entry()], descriptor_type()}.
1167parse_sparse_map(#header_gnu{sparse=Sparse}, Reader)
1168  when Sparse#sparse_array.is_extended ->
1169    parse_sparse_map(Sparse, Reader, []);
1170parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) ->
1171    {Sparse#sparse_array.entries, Reader}.
1172parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) ->
1173    case read_block(Reader) of
1174        eof ->
1175            throw({error, eof});
1176        {ok, Block, Reader2} ->
1177            Sparse2 = to_sparse_array(Block),
1178            parse_sparse_map(Sparse2, Reader2, Entries++Acc)
1179    end;
1180parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) ->
1181    Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) ->
1182                                A =< B
1183                        end, Entries++Acc),
1184    {Sorted, Reader}.
1185
1186%% Defined by taking the sum of the unsigned byte values of the
1187%% entire header record, treating the checksum bytes to as ASCII spaces
1188compute_checksum(<<H1:?V7_CHKSUM/binary,
1189                   H2:?V7_CHKSUM_LEN/binary,
1190                   Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
1191                   _/binary>>) ->
1192    C0 = checksum(H1) + (byte_size(H2) * $\s),
1193    C1 = checksum(Rest),
1194    C0 + C1.
1195
1196compute_signed_checksum(<<H1:?V7_CHKSUM/binary,
1197                          H2:?V7_CHKSUM_LEN/binary,
1198                          Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
1199                          _/binary>>) ->
1200    C0 = signed_checksum(H1) + (byte_size(H2) * $\s),
1201    C1 = signed_checksum(Rest),
1202    C0 + C1.
1203
1204%% Returns the checksum of a binary.
1205checksum(Bin) -> checksum(Bin, 0).
1206checksum(<<A/unsigned,Rest/binary>>, Sum) ->
1207    checksum(Rest, Sum+A);
1208checksum(<<>>, Sum) -> Sum.
1209
1210signed_checksum(Bin) -> signed_checksum(Bin, 0).
1211signed_checksum(<<A/signed,Rest/binary>>, Sum) ->
1212    signed_checksum(Rest, Sum+A);
1213signed_checksum(<<>>, Sum) -> Sum.
1214
1215-spec parse_numeric(binary()) -> non_neg_integer().
1216parse_numeric(<<>>) ->
1217    0;
1218parse_numeric(<<First, _/binary>> = Bin) ->
1219    %% check for base-256 format first
1220    %% if the bit is set, then all following bits constitute a two's
1221    %% complement encoded number in big-endian byte order
1222    if
1223        First band 16#80 =/= 0 ->
1224            %% Handling negative numbers relies on the following identity:
1225            %%     -a-1 == ^a
1226            %% If the number is negative, we use an inversion mask to invert
1227            %% the data bytes and treat the value as an unsigned number
1228            Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end,
1229            Bytes = binary:bin_to_list(Bin),
1230            Reducer = fun (C, {I, X}) ->
1231                              C1 = C bxor Inv,
1232                              C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end,
1233                              if (X bsr 56) > 0 ->
1234                                      throw({error,integer_overflow});
1235                                 true ->
1236                                      {I+1, (X bsl 8) bor C2}
1237                              end
1238                      end,
1239            {_, N} = lists:foldl(Reducer, {0,0}, Bytes),
1240            if (N bsr 63) > 0 ->
1241                    throw({error, integer_overflow});
1242               true ->
1243                    if Inv =:= 16#FF ->
1244                            -1 bxor N;
1245                       true ->
1246                            N
1247                    end
1248            end;
1249        true ->
1250            %% normal case is an octal number
1251            parse_octal(Bin)
1252    end.
1253
1254parse_octal(Bin) when is_binary(Bin) ->
1255    %% skip leading/trailing zero bytes and spaces
1256    do_parse_octal(Bin, <<>>).
1257do_parse_octal(<<>>, <<>>) ->
1258    0;
1259do_parse_octal(<<>>, Acc) ->
1260    case io_lib:fread("~8u", binary:bin_to_list(Acc)) of
1261        {error, _} -> throw({error, invalid_tar_checksum});
1262        {ok, [Octal], []} -> Octal;
1263        {ok, _, _} -> throw({error, invalid_tar_checksum})
1264    end;
1265do_parse_octal(<<$\s,Rest/binary>>, Acc) ->
1266    do_parse_octal(Rest, Acc);
1267do_parse_octal(<<0, Rest/binary>>, Acc) ->
1268    do_parse_octal(Rest, Acc);
1269do_parse_octal(<<C, Rest/binary>>, Acc) ->
1270    do_parse_octal(Rest, <<Acc/binary, C>>).
1271
1272parse_string(Bin) when is_binary(Bin) ->
1273    do_parse_string(Bin, <<>>).
1274do_parse_string(<<>>, Acc) ->
1275    case unicode:characters_to_list(Acc) of
1276        Str when is_list(Str) ->
1277            Str;
1278        {incomplete, _Str, _Rest} ->
1279            binary:bin_to_list(Acc);
1280        {error, _Str, _Rest} ->
1281            throw({error, {bad_header, invalid_string}})
1282    end;
1283do_parse_string(<<0, _/binary>>, Acc) ->
1284    do_parse_string(<<>>, Acc);
1285do_parse_string(<<C, Rest/binary>>, Acc) ->
1286    do_parse_string(Rest, <<Acc/binary, C>>).
1287
1288convert_header(Bin, #reader{pos=Pos}=Reader)
1289  when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 ->
1290    case get_format(Bin) of
1291        ?FORMAT_UNKNOWN ->
1292            throw({error, bad_header});
1293        {ok, Format, V7} ->
1294            unpack_format(Format, V7, Bin, Reader);
1295        {error, Reason} ->
1296            throw({error, {bad_header, Reason}})
1297    end;
1298convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE ->
1299    throw({error, misaligned_read, Pos});
1300convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 ->
1301    eof;
1302convert_header(_Bin, _Reader) ->
1303    throw({error, eof}).
1304
1305%% Creates a partially-populated header record based
1306%% on the provided file_info record. If the file is
1307%% a symlink, then `link` is used as the link target.
1308%% If the file is a directory, a slash is appended to the name.
1309fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) ->
1310    BaseHeader = #tar_header{name=Name,
1311                             mtime=Fi#file_info.mtime,
1312                             atime=Fi#file_info.atime,
1313                             ctime=Fi#file_info.ctime,
1314                             mode=Fi#file_info.mode,
1315                             uid=Fi#file_info.uid,
1316                             gid=Fi#file_info.gid,
1317                             typeflag=?TYPE_REGULAR},
1318    do_fileinfo_to_header(BaseHeader, Fi, Link).
1319
1320do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) ->
1321    Header#tar_header{size=Size,typeflag=?TYPE_REGULAR};
1322do_fileinfo_to_header(#tar_header{name=Name}=Header,
1323                      #file_info{type=directory}, _Link) ->
1324    Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR};
1325do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) ->
1326    Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link};
1327do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
1328  when (Mode band ?S_IFMT) =:= ?S_IFCHR ->
1329    Header#tar_header{typeflag=?TYPE_CHAR,
1330                      devmajor=Fi#file_info.major_device,
1331                      devminor=Fi#file_info.minor_device};
1332do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
1333  when (Mode band ?S_IFMT) =:= ?S_IFBLK ->
1334    Header#tar_header{typeflag=?TYPE_BLOCK,
1335                      devmajor=Fi#file_info.major_device,
1336                      devminor=Fi#file_info.minor_device};
1337do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link)
1338  when (Mode band ?S_IFMT) =:= ?S_FIFO ->
1339    Header#tar_header{typeflag=?TYPE_FIFO};
1340do_fileinfo_to_header(Header, Fi, _Link) ->
1341    {error, {invalid_file_type, Header#tar_header.name, Fi}}.
1342
1343is_ascii(Str) when is_list(Str) ->
1344    not lists:any(fun (Char) -> Char >= 16#80 end, Str);
1345is_ascii(Bin) when is_binary(Bin) ->
1346    is_ascii1(Bin).
1347
1348is_ascii1(<<>>) ->
1349    true;
1350is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 ->
1351    false;
1352is_ascii1(<<_, Rest/binary>>) ->
1353    is_ascii1(Rest).
1354
1355to_ascii(Str) when is_list(Str) ->
1356    case is_ascii(Str) of
1357        true ->
1358            unicode:characters_to_binary(Str);
1359        false ->
1360            Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str),
1361            unicode:characters_to_binary(Chars)
1362    end;
1363to_ascii(Bin) when is_binary(Bin) ->
1364    to_ascii(Bin, <<>>).
1365to_ascii(<<>>, Acc) ->
1366    Acc;
1367to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 ->
1368    to_ascii(Rest, <<Acc/binary,C>>);
1369to_ascii(<<_, Rest/binary>>, Acc) ->
1370    to_ascii(Rest, Acc).
1371
1372is_header_only_type(?TYPE_SYMLINK) -> true;
1373is_header_only_type(?TYPE_LINK)    -> true;
1374is_header_only_type(?TYPE_DIR)     -> true;
1375is_header_only_type(_) -> false.
1376
1377foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts)
1378  when is_function(Fun,4) ->
1379    case foldl_read0(Reader, Fun, Accu, Opts) of
1380        {ok, Result, _Reader2} ->
1381            Result;
1382        {error, _} = Err ->
1383            Err
1384    end;
1385foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) ->
1386    {error, {read_mode_expected, Access}};
1387foldl_read(TarName, Fun, Accu, #read_opts{}=Opts)
1388  when is_function(Fun,4) ->
1389    try open(TarName, [read|Opts#read_opts.open_mode]) of
1390        {ok, #reader{access=read}=Reader} ->
1391            try
1392                foldl_read(Reader, Fun, Accu, Opts)
1393            after
1394                _ = close(Reader)
1395            end;
1396        {error, _} = Err ->
1397            Err
1398    catch
1399        throw:Err ->
1400            Err
1401    end.
1402
1403foldl_read0(Reader, Fun, Accu, Opts) ->
1404    try foldl_read1(Fun, Accu, Reader, Opts, #{}) of
1405        {ok,_,_} = Ok ->
1406            Ok
1407    catch
1408        throw:{error, {Reason, Format, Args}} ->
1409            read_verbose(Opts, Format, Args),
1410            {error, Reason};
1411        throw:Err ->
1412            Err
1413    end.
1414
1415foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) ->
1416    {ok, Reader1} = skip_unread(Reader0),
1417    case get_header(Reader1) of
1418        eof ->
1419            Fun(eof, Reader1, Opts, Accu0);
1420        {Header, Reader2} ->
1421            case Header#tar_header.typeflag of
1422                ?TYPE_X_HEADER ->
1423                    {ExtraHeaders2, Reader3} = parse_pax(Reader2),
1424                    ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2),
1425                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3);
1426                ?TYPE_GNU_LONGNAME ->
1427                    {RealName, Reader3} = get_real_name(Reader2),
1428                    ExtraHeaders2 = maps:put(?PAX_PATH,
1429                                             parse_string(RealName), ExtraHeaders),
1430                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
1431                ?TYPE_GNU_LONGLINK ->
1432                    {RealName, Reader3} = get_real_name(Reader2),
1433                    ExtraHeaders2 = maps:put(?PAX_LINKPATH,
1434                                             parse_string(RealName), ExtraHeaders),
1435                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
1436                _ ->
1437                    Header1 = merge_pax(Header, ExtraHeaders),
1438                    {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0),
1439                    foldl_read1(Fun, NewAccu, Reader3, Opts, #{})
1440            end
1441    end.
1442
1443%% Applies all known PAX attributes to the current tar header
1444-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header().
1445merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) ->
1446    do_merge_pax(Header, maps:to_list(ExtraHeaders)).
1447
1448do_merge_pax(Header, []) ->
1449    Header;
1450do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) ->
1451    do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest);
1452do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) ->
1453    do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest);
1454do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) ->
1455    do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest);
1456do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) ->
1457    do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest);
1458do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) ->
1459    Uid2 = binary_to_integer(Uid),
1460    do_merge_pax(Header#tar_header{uid=Uid2}, Rest);
1461do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) ->
1462    Gid2 = binary_to_integer(Gid),
1463    do_merge_pax(Header#tar_header{gid=Gid2}, Rest);
1464do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) ->
1465    Atime2 = parse_pax_time(Atime),
1466    do_merge_pax(Header#tar_header{atime=Atime2}, Rest);
1467do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) ->
1468    Mtime2 = parse_pax_time(Mtime),
1469    do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest);
1470do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) ->
1471    Ctime2 = parse_pax_time(Ctime),
1472    do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest);
1473do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) ->
1474    Size2 = binary_to_integer(Size),
1475    do_merge_pax(Header#tar_header{size=Size2}, Rest);
1476do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) ->
1477    do_merge_pax(Header, Rest);
1478do_merge_pax(Header, [_Ignore|Rest]) ->
1479    do_merge_pax(Header, Rest).
1480
1481%% Returns the time since UNIX epoch as a datetime
1482-spec parse_pax_time(binary()) -> tar_time().
1483parse_pax_time(Bin) when is_binary(Bin) ->
1484    TotalNano = case binary:split(Bin, [<<$.>>]) of
1485                    [SecondsStr, NanoStr0] ->
1486                        Seconds = binary_to_integer(SecondsStr),
1487                        if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE ->
1488                                %% right pad
1489                                PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0),
1490                                Padding = binary:copy(<<$0>>, PaddingN),
1491                                NanoStr1 = <<NanoStr0/binary,Padding/binary>>,
1492                                Nano = binary_to_integer(NanoStr1),
1493                                (Seconds*?BILLION)+Nano;
1494                           byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE ->
1495                                %% right truncate
1496                                NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE),
1497                                Nano = binary_to_integer(NanoStr1),
1498                                (Seconds*?BILLION)+Nano;
1499                           true ->
1500                                (Seconds*?BILLION)+binary_to_integer(NanoStr0)
1501                        end;
1502                    [SecondsStr] ->
1503                        binary_to_integer(SecondsStr)*?BILLION
1504                end,
1505    %% truncate to microseconds
1506    Micro = TotalNano div 1000,
1507    Mega = Micro div 1000000000000,
1508    Secs = Micro div 1000000 - (Mega*1000000),
1509    Secs.
1510
1511%% Given a regular file reader, reads the whole file and
1512%% parses all extended attributes it contains.
1513parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1514    {#{}, Handle};
1515parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
1516    case do_read(Handle0, NumBytes) of
1517        {ok, Bytes, Handle1} ->
1518            do_parse_pax(Handle1, Bytes, #{});
1519        {error, _} = Err ->
1520            throw(Err)
1521    end.
1522
1523do_parse_pax(Reader, <<>>, Headers) ->
1524    {Headers, Reader};
1525do_parse_pax(Reader, Bin, Headers) ->
1526    {Key, Value, Residual} = parse_pax_record(Bin),
1527    NewHeaders = maps:put(Key, Value, Headers),
1528    do_parse_pax(Reader, Residual, NewHeaders).
1529
1530%% Parse an extended attribute
1531parse_pax_record(Bin) when is_binary(Bin) ->
1532    case binary:split(Bin, [<<$\n>>]) of
1533        [Record, Residual] ->
1534            case binary:split(Record, [<<$\s>>], [trim_all]) of
1535                [_Len, Record1] ->
1536                    case binary:split(Record1, [<<$=>>], [trim_all]) of
1537                        [AttrName, AttrValue] ->
1538                            {AttrName, AttrValue, Residual};
1539                        _Other ->
1540                            throw({error, malformed_pax_record})
1541                    end;
1542                _Other ->
1543                    throw({error, malformed_pax_record})
1544            end;
1545        _Other ->
1546            throw({error, malformed_pax_record})
1547    end.
1548
1549get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1550    {"", Handle};
1551get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
1552    case do_read(Handle0, NumBytes) of
1553        {ok, RealName, Handle1} ->
1554            {RealName, Handle1};
1555        {error, _} = Err ->
1556            throw(Err)
1557    end;
1558get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) ->
1559    case do_read(Reader0, NumBytes) of
1560        {ok, RealName, Reader1} ->
1561            {RealName, Reader1};
1562        {error, _} = Err ->
1563            throw(Err)
1564    end.
1565
1566%% Skip the remaining bytes for the current file entry
1567skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) ->
1568    Padding = skip_padding(Size),
1569    AbsPos = Handle0#reader.pos + (Size-Pos) + Padding,
1570    case do_position(Handle0, AbsPos) of
1571        {ok, _, Handle1} ->
1572            Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size};
1573        Err ->
1574            throw(Err)
1575    end;
1576skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) ->
1577    case do_read(Reader, Size-Pos) of
1578        {ok, _, Reader2} ->
1579            Reader2;
1580        Err ->
1581            throw(Err)
1582    end.
1583
1584skip_padding(0) ->
1585    0;
1586skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 ->
1587    0;
1588skip_padding(Size) when Size =< ?BLOCK_SIZE ->
1589    ?BLOCK_SIZE - Size;
1590skip_padding(Size) ->
1591    ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE).
1592
1593skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 ->
1594    Padding = skip_padding(Pos + ?BLOCK_SIZE),
1595    AbsPos = Pos + Padding,
1596    case do_position(Reader0, AbsPos) of
1597        {ok, _, Reader1} ->
1598            {ok, Reader1};
1599        Err ->
1600            throw(Err)
1601    end;
1602skip_unread(#reader{}=Reader) ->
1603    {ok, Reader};
1604skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1605    skip_unread(Handle);
1606skip_unread(#reg_file_reader{}=Reader) ->
1607    #reg_file_reader{handle=Handle} = skip_file(Reader),
1608    {ok, Handle};
1609skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) ->
1610    skip_unread(Handle);
1611skip_unread(#sparse_file_reader{}=Reader) ->
1612    #sparse_file_reader{handle=Handle} = skip_file(Reader),
1613    {ok, Handle}.
1614
1615write_extracted_element(#tar_header{name=Name,typeflag=Type},
1616                        Bin,
1617                        #read_opts{output=memory}=Opts) ->
1618    case typeflag(Type) of
1619        regular ->
1620            read_verbose(Opts, "x ~ts~n", [Name]),
1621            {ok, {Name, Bin}};
1622        _ ->
1623            ok
1624    end;
1625write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
1626    Name1 = make_safe_path(Name0, Opts),
1627    Created =
1628        case typeflag(Header#tar_header.typeflag) of
1629            regular ->
1630                create_regular(Name1, Name0, Bin, Opts);
1631            directory ->
1632                read_verbose(Opts, "x ~ts~n", [Name0]),
1633                create_extracted_dir(Name1, Opts);
1634            symlink ->
1635                read_verbose(Opts, "x ~ts~n", [Name0]),
1636                LinkName = safe_link_name(Header, Opts),
1637                create_symlink(Name1, LinkName, Opts);
1638            Device when Device =:= char orelse Device =:= block ->
1639                %% char/block devices will be created as empty files
1640                %% and then have their major/minor device set later
1641                create_regular(Name1, Name0, <<>>, Opts);
1642            fifo ->
1643                %% fifo devices will be created as empty files
1644                create_regular(Name1, Name0, <<>>, Opts);
1645            Other -> % Ignore.
1646                read_verbose(Opts, "x ~ts - unsupported type ~p~n",
1647                             [Name0, Other]),
1648                not_written
1649        end,
1650    case Created of
1651        ok  -> set_extracted_file_info(Name1, Header);
1652        not_written -> ok
1653    end.
1654
1655make_safe_path([$/|Path], Opts) ->
1656    make_safe_path(Path, Opts);
1657make_safe_path(Path0, #read_opts{cwd=Cwd}) ->
1658    case filelib:safe_relative_path(Path0, Cwd) of
1659        unsafe -> throw({error,{Path0,unsafe_path}});
1660        Path -> filename:absname(Path, Cwd)
1661    end.
1662
1663safe_link_name(#tar_header{linkname=Path0},#read_opts{cwd=Cwd} ) ->
1664    case filelib:safe_relative_path(Path0, Cwd) of
1665        unsafe -> throw({error,{Path0,unsafe_symlink}});
1666        Path -> Path
1667    end.
1668
1669create_regular(Name, NameInArchive, Bin, Opts) ->
1670    case write_extracted_file(Name, Bin, Opts) of
1671        not_written ->
1672            read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]),
1673            not_written;
1674        Ok ->
1675            read_verbose(Opts, "x ~ts~n", [NameInArchive]),
1676            Ok
1677    end.
1678
1679create_extracted_dir(Name, _Opts) ->
1680    case file:make_dir(Name) of
1681        ok -> ok;
1682        {error,enotsup} -> not_written;
1683        {error,eexist} -> not_written;
1684        {error,enoent} -> make_dirs(Name, dir);
1685        {error,Reason} -> throw({error, Reason})
1686    end.
1687
1688create_symlink(Name, Linkname, Opts) ->
1689    case file:make_symlink(Linkname, Name) of
1690        ok -> ok;
1691        {error,enoent} ->
1692            ok = make_dirs(Name, file),
1693            create_symlink(Name, Linkname, Opts);
1694        {error,eexist} -> not_written;
1695        {error,enotsup} ->
1696            read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
1697            not_written;
1698        {error,Reason} -> throw({error, Reason})
1699    end.
1700
1701write_extracted_file(Name, Bin, Opts) ->
1702    Write =
1703        case Opts#read_opts.keep_old_files of
1704            true ->
1705                case file:read_file_info(Name) of
1706                    {ok, _} -> false;
1707                    _ -> true
1708                end;
1709            false -> true
1710        end,
1711    case Write of
1712        true  -> write_file(Name, Bin);
1713        false -> not_written
1714    end.
1715
1716write_file(Name, Bin) ->
1717    case file:write_file(Name, Bin) of
1718        ok -> ok;
1719        {error,enoent} ->
1720            case make_dirs(Name, file) of
1721                ok ->
1722                    write_file(Name, Bin);
1723                {error,Reason} ->
1724                    throw({error, Reason})
1725            end;
1726        {error,Reason} ->
1727            throw({error, Reason})
1728    end.
1729
1730set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok;
1731set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK})    -> ok;
1732set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) ->
1733    set_device_info(Name, Header);
1734set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) ->
1735    set_device_info(Name, Header);
1736set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) ->
1737    Info = #file_info{mode=Mode, mtime=Mtime},
1738    file:write_file_info(Name, Info, [{time, posix}]).
1739
1740set_device_info(Name, #tar_header{}=Header) ->
1741    Mtime = Header#tar_header.mtime,
1742    Mode = Header#tar_header.mode,
1743    Devmajor = Header#tar_header.devmajor,
1744    Devminor = Header#tar_header.devminor,
1745    Info = #file_info{
1746              mode=Mode,
1747              mtime=Mtime,
1748              major_device=Devmajor,
1749              minor_device=Devminor
1750             },
1751    file:write_file_info(Name, Info).
1752
1753%% Makes all directories leading up to the file.
1754
1755make_dirs(Name, file) ->
1756    filelib:ensure_dir(Name);
1757make_dirs(Name, dir) ->
1758    filelib:ensure_dir(filename:join(Name,"*")).
1759
1760%% Prints the message on if the verbose option is given (for reading).
1761read_verbose(#read_opts{verbose=true}, Format, Args) ->
1762    io:format(Format, Args);
1763read_verbose(_, _, _) ->
1764    ok.
1765
1766%% Prints the message on if the verbose option is given.
1767add_verbose(#add_opts{verbose=true}, Format, Args) ->
1768    io:format(Format, Args);
1769add_verbose(_, _, _) ->
1770    ok.
1771
1772%%%%%%%%%%%%%%%%%%
1773%% I/O primitives
1774%%%%%%%%%%%%%%%%%%
1775
1776do_write(#reader{handle=Handle,func=Fun}=Reader0, Data)
1777  when is_function(Fun,2) ->
1778    case Fun(write,{Handle,Data}) of
1779        ok ->
1780            {ok, Pos, Reader1} = do_position(Reader0, {cur,0}),
1781            {ok, Reader1#reader{pos=Pos}};
1782        {error, _} = Err ->
1783            Err
1784    end.
1785
1786do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts)
1787  when is_function(Fun, 2) ->
1788    do_copy(Reader, Source, Opts#add_opts{chunk_size=65536});
1789do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize})
1790    when is_function(Fun, 2) ->
1791    case file:open(Source, [read, binary]) of
1792        {ok, SourceFd} ->
1793            case copy_chunked(Reader, SourceFd, ChunkSize, 0) of
1794                {ok, _Copied, _Reader2} = Ok->
1795                    _ = file:close(SourceFd),
1796                    Ok;
1797                Err ->
1798                    _ = file:close(SourceFd),
1799                    throw(Err)
1800            end;
1801        Err ->
1802            throw(Err)
1803    end.
1804
1805copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) ->
1806    case file:read(Source, ChunkSize) of
1807        {ok, Bin} ->
1808            {ok, Reader2} = do_write(Reader, Bin),
1809            copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin));
1810        eof ->
1811            {ok, Copied, Reader};
1812        Other ->
1813            Other
1814    end.
1815
1816
1817do_position(#reader{handle=Handle,func=Fun}=Reader, Pos)
1818  when is_function(Fun,2)->
1819    case Fun(position, {Handle,Pos}) of
1820        {ok, NewPos} ->
1821            %% since Pos may not always be an absolute seek,
1822            %% make sure we update the reader with the new absolute position
1823            {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}),
1824            {ok, NewPos, Reader#reader{pos=AbsPos}};
1825        Other ->
1826            Other
1827    end.
1828
1829do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) ->
1830    NumBytes = Size - Pos,
1831    ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end,
1832    case do_read(Handle, ActualLen) of
1833        {ok, Bin, Handle2} ->
1834            NewPos = Pos + ActualLen,
1835            NumBytes2 = Size - NewPos,
1836            Reader1 = Reader#reg_file_reader{
1837                        handle=Handle2,
1838                        pos=NewPos,
1839                        num_bytes=NumBytes2},
1840            {ok, Bin, Reader1};
1841        Other ->
1842            Other
1843    end;
1844do_read(#sparse_file_reader{}=Reader, Len) ->
1845    do_sparse_read(Reader, Len);
1846do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len)
1847  when is_function(Fun,2)->
1848    %% Always convert to binary internally
1849    case Fun(read2,{Handle,Len}) of
1850        {ok, List} when is_list(List) ->
1851            Bin = list_to_binary(List),
1852            NewPos = Pos+byte_size(Bin),
1853            {ok, Bin, Reader#reader{pos=NewPos}};
1854        {ok, Bin} when is_binary(Bin) ->
1855            NewPos = Pos+byte_size(Bin),
1856            {ok, Bin, Reader#reader{pos=NewPos}};
1857        Other ->
1858            Other
1859    end.
1860
1861
1862do_sparse_read(Reader, Len) ->
1863    do_sparse_read(Reader, Len, <<>>).
1864
1865do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries]
1866                                  }=Reader0, Len, Acc) ->
1867    %% skip all empty fragments
1868    Reader1 = Reader0#sparse_file_reader{sparse_map=Entries},
1869    do_sparse_read(Reader1, Len, Acc);
1870do_sparse_read(#sparse_file_reader{sparse_map=[],
1871                                   pos=Pos,size=Size}=Reader0, Len, Acc)
1872  when Pos < Size ->
1873    %% if there are no more fragments, it is possible that there is one last sparse hole
1874    %% this behaviour matches the BSD tar utility
1875    %% however, GNU tar stops returning data even if we haven't reached the end
1876    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len),
1877    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1878do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) ->
1879    {ok, Acc, Reader};
1880do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) ->
1881    {ok, Acc, Reader};
1882do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_],
1883                                   pos=Pos}=Reader0, Len, Acc)
1884  when Pos < Offset ->
1885    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos),
1886    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1887do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries],
1888                                   pos=Pos}=Reader0, Len, Acc) ->
1889    %% we're in a data fragment, so read from it
1890    %% end offset of fragment
1891    EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes,
1892    %% bytes left in fragment
1893    NumBytes = EndPos - Pos,
1894    ActualLen = if Len > NumBytes -> NumBytes; true -> Len end,
1895    case do_read(Reader0#sparse_file_reader.handle, ActualLen) of
1896        {ok, Bin, Handle} ->
1897            BytesRead = byte_size(Bin),
1898            ActualEndPos = Pos+BytesRead,
1899            Reader1 = if ActualEndPos =:= EndPos ->
1900                              Reader0#sparse_file_reader{sparse_map=Entries};
1901                         true ->
1902                              Reader0
1903                      end,
1904            Size = Reader1#sparse_file_reader.size,
1905            NumBytes2 = Size - ActualEndPos,
1906            Reader2 = Reader1#sparse_file_reader{
1907                        handle=Handle,
1908                        pos=ActualEndPos,
1909                        num_bytes=NumBytes2},
1910            do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1911        Other ->
1912            Other
1913    end.
1914
1915%% Reads a sparse hole ending at Offset
1916read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) ->
1917    N = Offset - Pos,
1918    N2 = if N > Len ->
1919                 Len;
1920            true ->
1921                 N
1922         end,
1923    Bin = <<0:N2/unit:8>>,
1924    NumBytes = Reader#sparse_file_reader.size - (Pos+N2),
1925    {ok, Bin, Reader#sparse_file_reader{
1926                num_bytes=NumBytes,
1927                pos=Pos+N2}}.
1928
1929-spec do_close(tar_descriptor()) -> ok | {error, term()}.
1930do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) ->
1931    Fun(close,Handle).
1932
1933%%%%%%%%%%%%%%%%%%
1934%% Option parsing
1935%%%%%%%%%%%%%%%%%%
1936
1937extract_opts(List) ->
1938    extract_opts(List, default_options()).
1939
1940table_opts(List) ->
1941    read_opts(List, default_options()).
1942
1943default_options() ->
1944    {ok, Cwd} = file:get_cwd(),
1945    #read_opts{cwd=Cwd}.
1946
1947extract_opts([keep_old_files|Rest], Opts) ->
1948    extract_opts(Rest, Opts#read_opts{keep_old_files=true});
1949extract_opts([{cwd, Cwd}|Rest], Opts) ->
1950    extract_opts(Rest, Opts#read_opts{cwd=Cwd});
1951extract_opts([{files, Files}|Rest], Opts) ->
1952    Set = ordsets:from_list(Files),
1953    extract_opts(Rest, Opts#read_opts{files=Set});
1954extract_opts([memory|Rest], Opts) ->
1955    extract_opts(Rest, Opts#read_opts{output=memory});
1956extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1957    extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
1958extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1959    extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
1960extract_opts([verbose|Rest], Opts) ->
1961    extract_opts(Rest, Opts#read_opts{verbose=true});
1962extract_opts([Other|Rest], Opts) ->
1963    extract_opts(Rest, read_opts([Other], Opts));
1964extract_opts([], Opts) ->
1965    Opts.
1966
1967read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1968    read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
1969read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1970    read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
1971read_opts([verbose|Rest], Opts) ->
1972    read_opts(Rest, Opts#read_opts{verbose=true});
1973read_opts([_|Rest], Opts) ->
1974    read_opts(Rest, Opts);
1975read_opts([], Opts) ->
1976    Opts.
1977