1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 1997-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%% This module implements extraction/creation of tar archives.
21%% It supports reading most common tar formats, namely V7, STAR,
22%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR
23%% format, unless it must use PAX headers, in which case it produces PAX
24%% format.
25%%
26%% The following references where used:
27%%   http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
28%%   http://www.gnu.org/software/tar/manual/html_node/Standard.html
29%%   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
30-module(erl_tar).
31
32-export([init/3,
33         create/2, create/3,
34         extract/1, extract/2,
35         table/1, table/2, t/1, tt/1,
36         open/2, close/1,
37         add/3, add/4,
38         format_error/1]).
39
40-include_lib("kernel/include/file.hrl").
41-include_lib("erl_tar.hrl").
42
43%% Converts the short error reason to a descriptive string.
44-spec format_error(term()) -> string().
45format_error(invalid_tar_checksum) ->
46    "Checksum failed";
47format_error(bad_header) ->
48    "Unrecognized tar header format";
49format_error({bad_header, Reason}) ->
50    lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason]));
51format_error({invalid_header, negative_size}) ->
52    "Invalid header: negative size";
53format_error(invalid_sparse_header_size) ->
54    "Invalid sparse header: negative size";
55format_error(invalid_sparse_map_entry) ->
56    "Invalid sparse map entry";
57format_error({invalid_sparse_map_entry, Reason}) ->
58    lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason]));
59format_error(invalid_end_of_archive) ->
60    "Invalid end of archive";
61format_error(eof) ->
62    "Unexpected end of file";
63format_error(integer_overflow) ->
64    "Failed to parse numeric: integer overflow";
65format_error({misaligned_read, Pos}) ->
66    lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p",
67                                [?BLOCK_SIZE, Pos]));
68format_error(invalid_gnu_1_0_sparsemap) ->
69    "Invalid GNU sparse map (version 1.0)";
70format_error({invalid_gnu_0_1_sparsemap, Format}) ->
71    lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
72format_error(unsafe_path) ->
73    "The path points above the current working directory";
74format_error({Name,Reason}) ->
75    lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
76format_error(Atom) when is_atom(Atom) ->
77    file:format_error(Atom);
78format_error(Term) ->
79    lists:flatten(io_lib:format("~tp", [Term])).
80
81%% Initializes a new reader given a custom file handle and I/O wrappers
82-spec init(UserData :: user_data(), write | read, file_op()) ->
83                  {ok, tar_descriptor()} | {error, badarg}.
84init(UserData, AccessMode, Fun) when is_function(Fun, 2) ->
85    Reader = #reader{handle=UserData,access=AccessMode,func=Fun},
86    {ok, Pos, Reader2} = do_position(Reader, {cur, 0}),
87    {ok, Reader2#reader{pos=Pos}};
88init(_UserData, _AccessMode, _Fun) ->
89    {error, badarg}.
90
91%%%================================================================
92%% Extracts all files from the tar file Name.
93-spec extract(Open :: open_type()) -> ok | {error, term()}.
94extract(Name) ->
95    extract(Name, []).
96
97%% Extracts (all) files from the tar file Name.
98%% Options accepted:
99%%  - cooked: Opens the tar file without mode `raw`
100%%  - compressed: Uncompresses the tar file when reading
101%%  - memory: Returns the tar contents as a list of tuples {Name, Bin}
102%%  - keep_old_files: Extracted files will not overwrite the destination
103%%  - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract
104%%  - verbose: Prints verbose information about the extraction,
105%%  - {cwd, AbsoluteDir}: Sets the current working directory for the extraction
106-spec extract(Open :: open_type(), [extract_opt()]) ->
107                     {ok, [{string(), binary()}]} |
108                     {error, term()} |
109                     ok.
110extract({binary, Bin}, Opts) when is_list(Opts) ->
111    do_extract({binary, Bin}, Opts);
112extract({file, Fd}, Opts) when is_list(Opts) ->
113    do_extract({file, Fd}, Opts);
114extract(#reader{}=Reader, Opts) when is_list(Opts) ->
115    do_extract(Reader, Opts);
116extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
117    do_extract(Name, Opts).
118
119do_extract(Handle, Opts) when is_list(Opts) ->
120    Opts2 = extract_opts(Opts),
121    Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
122    foldl_read(Handle, fun extract1/4, Acc, Opts2).
123
124extract1(eof, Reader, _, Acc) when is_list(Acc) ->
125    {ok, {ok, lists:reverse(Acc)}, Reader};
126extract1(eof, Reader, _, leading_slash) ->
127    error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
128    {ok, ok, Reader};
129extract1(eof, Reader, _, Acc) ->
130    {ok, Acc, Reader};
131extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) ->
132    case check_extract(Name, Opts) of
133        true ->
134            case do_read(Reader0, Size) of
135                {ok, Bin, Reader1} ->
136                    Acc = extract2(Header, Bin, Opts, Acc0),
137                    {ok, Acc, Reader1};
138                {error, _} = Err ->
139                    throw(Err)
140            end;
141        false ->
142            {ok, Acc0, skip_file(Reader0)}
143    end.
144
145extract2(Header, Bin, Opts, Acc) ->
146    case write_extracted_element(Header, Bin, Opts) of
147        ok ->
148            case Header of
149                #tar_header{name="/"++_} ->
150                    leading_slash;
151                #tar_header{} ->
152                    Acc
153            end;
154        {ok, NameBin} when is_list(Acc) ->
155            [NameBin | Acc];
156        {error, _} = Err ->
157            throw(Err)
158    end.
159
160%% Checks if the file Name should be extracted.
161check_extract(_, #read_opts{files=all}) ->
162    true;
163check_extract(Name, #read_opts{files=Files}) ->
164    ordsets:is_element(Name, Files).
165
166%%%================================================================
167%% The following table functions produce a list of information about
168%% the files contained in the archive.
169-type typeflag() :: regular | link | symlink |
170                    char | block | directory |
171                    fifo | reserved | unknown.
172-type mode() :: non_neg_integer().
173-type uid() :: non_neg_integer().
174-type gid() :: non_neg_integer().
175
176-type tar_entry() :: {Name :: name_in_archive(),
177                      Type :: typeflag(),
178                      Size :: non_neg_integer(),
179                      MTime :: tar_time(),
180                      Mode :: mode(),
181                      Uid :: uid(),
182                      Gid :: gid()}.
183
184%% Returns a list of names of the files in the tar file Name.
185-spec table(Open :: open_type()) -> {ok, [name_in_archive()]} | {error, term()}.
186table(Name) ->
187    table(Name, []).
188
189%% Returns a list of names of the files in the tar file Name.
190%% Options accepted: compressed, verbose, cooked.
191-spec table(Open :: open_type(), [compressed | verbose | cooked]) ->
192                   {ok, [name_in_archive() | tar_entry()]} | {error, term()}.
193table(Name, Opts) when is_list(Opts) ->
194    foldl_read(Name, fun table1/4, [], table_opts(Opts)).
195
196table1(eof, Reader, _, Result) ->
197    {ok, {ok, lists:reverse(Result)}, Reader};
198table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) ->
199    Attrs = table1_attrs(Header, Verbose),
200    Reader2 = skip_file(Reader),
201    {ok, [Attrs|Result], Reader2}.
202
203%% Extracts attributes relevant to table1's output
204table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) ->
205    Type = typeflag(Typeflag),
206    Name = Header#tar_header.name,
207    Mtime = Header#tar_header.mtime,
208    Uid = Header#tar_header.uid,
209    Gid = Header#tar_header.gid,
210    Size = Header#tar_header.size,
211    {Name, Type, Size, Mtime, Mode, Uid, Gid};
212table1_attrs(#tar_header{name=Name}, _Verbose) ->
213    Name.
214
215typeflag(?TYPE_REGULAR) -> regular;
216typeflag(?TYPE_REGULAR_A) -> regular;
217typeflag(?TYPE_GNU_SPARSE) -> regular;
218typeflag(?TYPE_CONT) -> regular;
219typeflag(?TYPE_LINK) -> link;
220typeflag(?TYPE_SYMLINK) -> symlink;
221typeflag(?TYPE_CHAR) -> char;
222typeflag(?TYPE_BLOCK) -> block;
223typeflag(?TYPE_DIR) -> directory;
224typeflag(?TYPE_FIFO) -> fifo;
225typeflag(_) -> unknown.
226
227%%%================================================================
228%% Comments for printing the contents of a tape archive,
229%% meant to be invoked from the shell.
230
231%% Prints each filename in the archive
232-spec t(file:filename()) -> ok | {error, term()}.
233t(Name) when is_list(Name); is_binary(Name) ->
234    case table(Name) of
235        {ok, List} ->
236            lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
237        Error ->
238            Error
239    end.
240
241%% Prints verbose information about each file in the archive
242-spec tt(open_type()) -> ok | {error, term()}.
243tt(Name) ->
244    case table(Name, [verbose]) of
245        {ok, List} ->
246            lists:foreach(fun print_header/1, List);
247        Error ->
248            Error
249    end.
250
251%% Used by tt/1 to print a tar_entry tuple
252-spec print_header(tar_entry()) -> ok.
253print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) ->
254    io:format("~s~s ~4w/~-4w ~7w ~s ~s\n",
255              [type_to_string(Type), mode_to_string(Mode),
256               Uid, Gid, Size, time_to_string(Mtime), Name]).
257
258type_to_string(regular)   -> "-";
259type_to_string(directory) -> "d";
260type_to_string(link)      -> "l";
261type_to_string(symlink)   -> "s";
262type_to_string(char)      -> "c";
263type_to_string(block)     -> "b";
264type_to_string(fifo)      -> "f";
265type_to_string(unknown)   -> "?".
266
267%% Converts a numeric mode to its human-readable representation
268mode_to_string(Mode) ->
269    mode_to_string(Mode, "xwrxwrxwr", []).
270mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 ->
271    mode_to_string(Mode bsr 1, T, [C|Acc]);
272mode_to_string(Mode, [_|T], Acc) ->
273    mode_to_string(Mode bsr 1, T, [$-|Acc]);
274mode_to_string(_, [], Acc) ->
275    Acc.
276
277%% Converts a tar_time() (POSIX time) to a readable string
278time_to_string(Secs0) ->
279    Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH),
280    Secs = Epoch + Secs0,
281    DateTime0 = calendar:gregorian_seconds_to_datetime(Secs),
282    DateTime = calendar:universal_time_to_local_time(DateTime0),
283    {{Y, Mon, Day}, {H, Min, _}} = DateTime,
284    io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]).
285
286two_d(N) ->
287    tl(integer_to_list(N + 100)).
288
289month(1) -> "Jan";
290month(2) -> "Feb";
291month(3) -> "Mar";
292month(4) -> "Apr";
293month(5) -> "May";
294month(6) -> "Jun";
295month(7) -> "Jul";
296month(8) -> "Aug";
297month(9) -> "Sep";
298month(10) -> "Oct";
299month(11) -> "Nov";
300month(12) -> "Dec".
301
302%%%================================================================
303%% The open function with friends is to keep the file and binary api of this module
304-type open_type() :: file:filename_all()
305                     | {binary, binary()}
306                     | {file, file:io_device()}.
307-spec open(Open :: open_type(), [write | compressed | cooked]) ->
308                  {ok, tar_descriptor()} | {error, term()}.
309open({binary, Bin}, Mode) when is_binary(Bin) ->
310    do_open({binary, Bin}, Mode);
311open({file, Fd}, Mode) ->
312    do_open({file, Fd}, Mode);
313open(Name, Mode) when is_list(Name); is_binary(Name) ->
314    do_open(Name, Mode).
315
316do_open(Name, Mode) when is_list(Mode) ->
317    case open_mode(Mode) of
318        {ok, Access, Raw, Opts} ->
319            open1(Name, Access, Raw, Opts);
320        {error, Reason} ->
321            {error, {Name, Reason}}
322    end.
323
324open1({binary,Bin}, read, _Raw, Opts) when is_binary(Bin) ->
325    case file:open(Bin, [ram,binary,read]) of
326        {ok,File} ->
327            _ = [ram_file:uncompress(File) || lists:member(compressed, Opts)],
328            {ok, #reader{handle=File,access=read,func=fun file_op/2}};
329        Error ->
330            Error
331    end;
332open1({file, Fd}, read, _Raw, _Opts) ->
333    Reader = #reader{handle=Fd,access=read,func=fun file_op/2},
334    case do_position(Reader, {cur, 0}) of
335        {ok, Pos, Reader2} ->
336            {ok, Reader2#reader{pos=Pos}};
337        {error, _} = Err ->
338            Err
339    end;
340open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) ->
341    case file:open(Name, Raw ++ [binary, Access|Opts]) of
342        {ok, File} ->
343            {ok, #reader{handle=File,access=Access,func=fun file_op/2}};
344        {error, Reason} ->
345            {error, {Name, Reason}}
346    end.
347
348open_mode(Mode) ->
349    open_mode(Mode, false, [raw], []).
350
351open_mode(read, _, Raw, _) ->
352    {ok, read, Raw, []};
353open_mode(write, _, Raw, _) ->
354    {ok, write, Raw, []};
355open_mode([read|Rest], false, Raw, Opts) ->
356    open_mode(Rest, read, Raw, Opts);
357open_mode([write|Rest], false, Raw, Opts) ->
358    open_mode(Rest, write, Raw, Opts);
359open_mode([compressed|Rest], Access, Raw, Opts) ->
360    open_mode(Rest, Access, Raw, [compressed,read_ahead|Opts]);
361open_mode([cooked|Rest], Access, _Raw, Opts) ->
362    open_mode(Rest, Access, [], Opts);
363open_mode([], Access, Raw, Opts) ->
364    {ok, Access, Raw, Opts};
365open_mode(_, _, _, _) ->
366    {error, einval}.
367
368file_op(write, {Fd, Data}) ->
369    file:write(Fd, Data);
370file_op(position, {Fd, Pos}) ->
371    file:position(Fd, Pos);
372file_op(read2, {Fd, Size}) ->
373    file:read(Fd, Size);
374file_op(close, Fd) ->
375    file:close(Fd).
376
377%% Closes a tar archive.
378-spec close(TarDescriptor :: tar_descriptor()) -> ok | {error, term()}.
379close(#reader{access=read}=Reader) ->
380    ok = do_close(Reader);
381close(#reader{access=write}=Reader) ->
382    {ok, Reader2} = pad_file(Reader),
383    ok = do_close(Reader2),
384    ok;
385close(_) ->
386    {error, einval}.
387
388pad_file(#reader{pos=Pos}=Reader) ->
389    %% There must be at least two zero blocks at the end.
390    PadCurrent = skip_padding(Pos+?BLOCK_SIZE),
391    Padding = <<0:PadCurrent/unit:8>>,
392    do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]).
393
394
395%%%================================================================
396%% Creation/modification of tar archives
397
398%% Creates a tar file Name containing the given files.
399-spec create(file:filename_all(), filelist()) -> ok | {error, {string(), term()}}.
400create(Name, FileList) when is_list(Name); is_binary(Name) ->
401    create(Name, FileList, []).
402
403%% Creates a tar archive Name containing the given files.
404%% Accepted options: verbose, compressed, cooked
405-spec create(file:filename_all(), filelist(), [create_opt()]) ->
406                    ok | {error, term()} | {error, {string(), term()}}.
407create(Name, FileList, Options) when is_list(Name); is_binary(Name) ->
408    Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked)
409                        end, Options),
410    case open(Name, [write|Mode]) of
411        {ok, TarFile} ->
412            do_create(TarFile, FileList, Options);
413        {error, _} = Err ->
414            Err
415    end.
416
417do_create(TarFile, [], _Opts) ->
418    close(TarFile);
419do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) ->
420    case add(TarFile, NameOrBin, NameInArchive, Opts) of
421        ok ->
422            do_create(TarFile, Rest, Opts);
423        {error, _} = Err ->
424            _ = close(TarFile),
425            Err
426    end;
427do_create(TarFile, [Name|Rest], Opts) ->
428    case add(TarFile, Name, Name, Opts) of
429        ok ->
430            do_create(TarFile, Rest, Opts);
431        {error, _} = Err ->
432            _ = close(TarFile),
433            Err
434    end.
435
436%% Adds a file to a tape archive.
437-type add_type() :: name_in_archive()
438                  | {name_in_archive(), file:filename_all()}.
439-spec add(TarDescriptor, AddType, Options) -> ok | {error, term()} when
440    TarDescriptor :: tar_descriptor(),
441    AddType :: add_type(),
442    Options :: [add_opt()].
443add(Reader, {NameInArchive, Name}, Opts)
444  when is_list(NameInArchive), is_list(Name) ->
445    do_add(Reader, Name, NameInArchive, Opts);
446add(Reader, {NameInArchive, Bin}, Opts)
447  when is_list(NameInArchive), is_binary(Bin) ->
448    do_add(Reader, Bin, NameInArchive, Opts);
449add(Reader, Name, Opts) when is_list(Name) ->
450    do_add(Reader, Name, Name, Opts).
451
452-spec add(TarDescriptor, Filename, NameInArchive, Options) ->
453        ok | {error, term()} when
454    TarDescriptor :: tar_descriptor(),
455    Filename :: file:filename_all(),
456    NameInArchive :: name_in_archive(),
457    Options :: [add_opt()].
458add(Reader, NameOrBin, NameInArchive, Options)
459  when is_list(NameOrBin); is_binary(NameOrBin),
460       is_list(NameInArchive), is_list(Options) ->
461    do_add(Reader, NameOrBin, NameInArchive, Options).
462
463do_add(#reader{access=write}=Reader, Name, NameInArchive, Options)
464  when is_list(NameInArchive), is_list(Options) ->
465    RF = apply_file_info_opts_fun(Options, read_link_info),
466    Opts = #add_opts{read_info=RF},
467    add1(Reader, Name, NameInArchive, add_opts(Options, Options, Opts));
468do_add(#reader{access=read},_,_,_) ->
469    {error, eacces};
470do_add(Reader,_,_,_) ->
471    {error, {badarg, Reader}}.
472
473add_opts([dereference|T], AllOptions, Opts) ->
474    RF = apply_file_info_opts_fun(AllOptions, read_file_info),
475    add_opts(T, AllOptions, Opts#add_opts{read_info=RF});
476add_opts([verbose|T], AllOptions, Opts) ->
477    add_opts(T, AllOptions, Opts#add_opts{verbose=true});
478add_opts([{chunks,N}|T], AllOptions, Opts) ->
479    add_opts(T, AllOptions, Opts#add_opts{chunk_size=N});
480add_opts([{atime,Value}|T], AllOptions, Opts) ->
481    add_opts(T, AllOptions, Opts#add_opts{atime=Value});
482add_opts([{mtime,Value}|T], AllOptions, Opts) ->
483    add_opts(T, AllOptions, Opts#add_opts{mtime=Value});
484add_opts([{ctime,Value}|T], AllOptions, Opts) ->
485    add_opts(T, AllOptions, Opts#add_opts{ctime=Value});
486add_opts([{uid,Value}|T], AllOptions, Opts) ->
487    add_opts(T, AllOptions, Opts#add_opts{uid=Value});
488add_opts([{gid,Value}|T], AllOptions, Opts) ->
489    add_opts(T, AllOptions, Opts#add_opts{gid=Value});
490add_opts([_|T], AllOptions, Opts) ->
491    add_opts(T, AllOptions, Opts);
492add_opts([], _AllOptions, Opts) ->
493    Opts.
494
495apply_file_info_opts(Opts, {ok, FileInfo}) ->
496    {ok, do_apply_file_info_opts(Opts, FileInfo)};
497apply_file_info_opts(_Opts, Other) ->
498    Other.
499
500do_apply_file_info_opts([{atime,Value}|T], FileInfo) ->
501    do_apply_file_info_opts(T, FileInfo#file_info{atime=Value});
502do_apply_file_info_opts([{mtime,Value}|T], FileInfo) ->
503    do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value});
504do_apply_file_info_opts([{ctime,Value}|T], FileInfo) ->
505    do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value});
506do_apply_file_info_opts([{uid,Value}|T], FileInfo) ->
507    do_apply_file_info_opts(T, FileInfo#file_info{uid=Value});
508do_apply_file_info_opts([{gid,Value}|T], FileInfo) ->
509    do_apply_file_info_opts(T, FileInfo#file_info{gid=Value});
510do_apply_file_info_opts([_|T], FileInfo) ->
511    do_apply_file_info_opts(T, FileInfo);
512do_apply_file_info_opts([], FileInfo) ->
513    FileInfo.
514
515apply_file_info_opts_fun(Options, InfoFunction) ->
516   fun(F) ->
517       apply_file_info_opts(Options, file:InfoFunction(F, [{time, posix}]))
518   end.
519
520add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts)
521  when is_list(Name) ->
522    Res = case ReadInfo(Name) of
523              {error, Reason0} ->
524                  {error, {Name, Reason0}};
525              {ok, #file_info{type=symlink}=Fi} ->
526                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
527                  {ok, Linkname} = file:read_link(Name),
528                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
529                  add_header(Reader, Header, Opts);
530              {ok, #file_info{type=regular}=Fi} ->
531                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
532                  Header = fileinfo_to_header(NameInArchive, Fi, false),
533                  {ok, Reader2} = add_header(Reader, Header, Opts),
534                  FileSize = Header#tar_header.size,
535                  {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts),
536                  Padding = skip_padding(FileSize),
537                  Pad = <<0:Padding/unit:8>>,
538                  do_write(Reader3, Pad);
539              {ok, #file_info{type=directory}=Fi} ->
540                  add_directory(Reader, Name, NameInArchive, Fi, Opts);
541              {ok, #file_info{}=Fi} ->
542                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
543                  Header = fileinfo_to_header(NameInArchive, Fi, false),
544                  add_header(Reader, Header, Opts)
545          end,
546    case Res of
547        ok -> ok;
548        {ok, _Reader} -> ok;
549        {error, _Reason} = Err -> Err
550    end;
551add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) ->
552    add_verbose(Opts, "a ~ts~n", [NameInArchive]),
553    Now = os:system_time(seconds),
554    Header = #tar_header{
555                name = NameInArchive,
556                size = byte_size(Bin),
557                typeflag = ?TYPE_REGULAR,
558                atime = add_opts_time(Opts#add_opts.atime, Now),
559                mtime = add_opts_time(Opts#add_opts.mtime, Now),
560                ctime = add_opts_time(Opts#add_opts.ctime, Now),
561                uid = Opts#add_opts.uid,
562                gid = Opts#add_opts.gid,
563                mode = 8#100644},
564    {ok, Reader2} = add_header(Reader, Header, Opts),
565    Padding = skip_padding(byte_size(Bin)),
566    Data = [Bin, <<0:Padding/unit:8>>],
567    case do_write(Reader2, Data) of
568        {ok, _Reader3} -> ok;
569        {error, Reason} -> {error, {NameInArchive, Reason}}
570    end.
571
572add_opts_time(undefined, Now) -> Now;
573add_opts_time(Time, _Now) -> Time.
574
575add_directory(Reader, DirName, NameInArchive, Info, Opts) ->
576    case file:list_dir(DirName) of
577        {ok, []} ->
578            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
579            Header = fileinfo_to_header(NameInArchive, Info, false),
580            add_header(Reader, Header, Opts);
581        {ok, Files} ->
582            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
583            try add_files(Reader, Files, DirName, NameInArchive, Opts) of
584                ok -> ok;
585                {error, _} = Err -> Err
586            catch
587                throw:{error, {_Name, _Reason}} = Err -> Err;
588                throw:{error, Reason} -> {error, {DirName, Reason}}
589            end;
590        {error, Reason} ->
591            {error, {DirName, Reason}}
592    end.
593
594add_files(_Reader, [], _Dir, _DirInArchive, _Opts) ->
595    ok;
596add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) ->
597    FullName = filename:join(Dir, Name),
598    NameInArchive = filename:join(DirInArchive, Name),
599    Res = case Info(FullName) of
600              {error, Reason} ->
601                  {error, {FullName, Reason}};
602              {ok, #file_info{type=directory}=Fi} ->
603                  add_directory(Reader, FullName, NameInArchive, Fi, Opts);
604              {ok, #file_info{type=symlink}=Fi} ->
605                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
606                  {ok, Linkname} = file:read_link(FullName),
607                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
608                  add_header(Reader, Header, Opts);
609              {ok, #file_info{type=regular}=Fi} ->
610                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
611                  Header = fileinfo_to_header(NameInArchive, Fi, false),
612                  {ok, Reader2} = add_header(Reader, Header, Opts),
613                  FileSize = Header#tar_header.size,
614                  {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts),
615                  Padding = skip_padding(FileSize),
616                  Pad = <<0:Padding/unit:8>>,
617                  do_write(Reader3, Pad);
618              {ok, #file_info{}=Fi} ->
619                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
620                  Header = fileinfo_to_header(NameInArchive, Fi, false),
621                  add_header(Reader, Header, Opts)
622          end,
623    case Res of
624        ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts);
625        {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts);
626        {error, _} = Err -> Err
627    end.
628
629format_string(String, Size) when length(String) > Size ->
630    throw({error, {write_string, field_too_long}});
631format_string(String, Size) ->
632    Ascii = to_ascii(String),
633    if byte_size(Ascii) < Size ->
634            [Ascii, 0];
635       true ->
636            Ascii
637    end.
638
639format_octal(Octal) ->
640    iolist_to_binary(io_lib:fwrite("~.8B", [Octal])).
641
642add_header(#reader{}=Reader, #tar_header{}=Header, Opts) ->
643    {ok, Iodata} = build_header(Header, Opts),
644    do_write(Reader, Iodata).
645
646write_to_block(Block, IoData, Start) when is_list(IoData) ->
647    write_to_block(Block, iolist_to_binary(IoData), Start);
648write_to_block(Block, Bin, Start) when is_binary(Bin) ->
649    Size = byte_size(Bin),
650    <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block,
651    <<Head:Start/unit:8, Bin/binary, Rest/binary>>.
652
653build_header(#tar_header{}=Header, Opts) ->
654    #tar_header{
655       name=Name,
656       mode=Mode,
657       uid=Uid,
658       gid=Gid,
659       size=Size,
660       typeflag=Type,
661       linkname=Linkname,
662       uname=Uname,
663       gname=Gname,
664       devmajor=Devmaj,
665       devminor=Devmin
666      } = Header,
667    Mtime = Header#tar_header.mtime,
668
669    Block0 = ?ZERO_BLOCK,
670    {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}),
671    Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode),
672    {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0),
673    {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1),
674    {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2),
675    {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3),
676    {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4),
677    {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN,
678                                  Linkname, ?PAX_LINKPATH, Pax5),
679    {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN,
680                                  Uname, ?PAX_UNAME, Pax6),
681    {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN,
682                                   Gname, ?PAX_GNAME, Pax7),
683    {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN,
684                                    Devmaj, ?PAX_NONE, Pax8),
685    {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN,
686                                     Devmin, ?PAX_NONE, Pax9),
687    {Block13, Pax11} = set_path(Block12, Pax10),
688    PaxEntry = case maps:size(Pax11) of
689                   0 -> [];
690                   _ -> build_pax_entry(Header, Pax11, Opts)
691               end,
692    Block14 = set_format(Block13, ?FORMAT_USTAR),
693    Block15 = set_checksum(Block14),
694    {ok, [PaxEntry, Block15]}.
695
696set_path(Block0, Pax) ->
697     %% only use ustar header when name is too long
698    case maps:get(?PAX_PATH, Pax, nil) of
699        nil ->
700            {Block0, Pax};
701        PaxPath ->
702            case split_ustar_path(PaxPath) of
703                {ok, UstarName, UstarPrefix} ->
704                    {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN,
705                                               UstarName, ?PAX_NONE, #{}),
706                    {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN,
707                                               UstarPrefix, ?PAX_NONE, #{}),
708                    {Block2, maps:remove(?PAX_PATH, Pax)};
709                false ->
710                    {Block0, Pax}
711            end
712    end.
713
714set_format(Block0, Format)
715  when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX ->
716    Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC),
717    write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION);
718set_format(_Block, Format) ->
719    throw({error, {invalid_format, Format}}).
720
721set_checksum(Block) ->
722    Checksum = compute_checksum(Block),
723    write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum).
724
725build_pax_entry(Header, PaxAttrs, Opts) ->
726    Path = Header#tar_header.name,
727    Filename = filename:basename(Path),
728    Dir = filename:dirname(Path),
729    Path2 = filename:join([Dir, "PaxHeaders.0", Filename]),
730    AsciiPath = to_ascii(Path2),
731    Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN ->
732                    binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1);
733               true ->
734                    AsciiPath
735            end,
736    Keys = maps:keys(PaxAttrs),
737    SortedKeys = lists:sort(Keys),
738    PaxFile = build_pax_file(SortedKeys, PaxAttrs),
739    Size = byte_size(PaxFile),
740    Padding = (?BLOCK_SIZE -
741                   (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE,
742    Pad = <<0:Padding/unit:8>>,
743    PaxHeader = #tar_header{
744                   name=unicode:characters_to_list(Path3),
745                   size=Size,
746                   mtime=Header#tar_header.mtime,
747                   atime=Header#tar_header.atime,
748                   ctime=Header#tar_header.ctime,
749                   typeflag=?TYPE_X_HEADER
750                  },
751    {ok, PaxHeaderData} = build_header(PaxHeader, Opts),
752    [PaxHeaderData, PaxFile, Pad].
753
754build_pax_file(Keys, PaxAttrs) ->
755    build_pax_file(Keys, PaxAttrs, []).
756build_pax_file([], _, Acc) ->
757    unicode:characters_to_binary(Acc);
758build_pax_file([K|Rest], Attrs, Acc) ->
759    V = maps:get(K, Attrs),
760    Size = sizeof(K) + sizeof(V) + 3,
761    Size2 = sizeof(Size) + Size,
762    Key = to_string(K),
763    Value = to_string(V),
764    Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])),
765    if byte_size(Record) =/= Size2 ->
766            Size3 = byte_size(Record),
767            Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]),
768            build_pax_file(Rest, Attrs, [Acc, Record2]);
769       true ->
770            build_pax_file(Rest, Attrs, [Acc, Record])
771    end.
772
773sizeof(Bin) when is_binary(Bin) ->
774    byte_size(Bin);
775sizeof(List) when is_list(List) ->
776    length(List);
777sizeof(N) when is_integer(N) ->
778    byte_size(integer_to_binary(N));
779sizeof(N) when is_float(N) ->
780    byte_size(float_to_binary(N)).
781
782to_string(Bin) when is_binary(Bin) ->
783    unicode:characters_to_list(Bin);
784to_string(List) when is_list(List) ->
785    List;
786to_string(N) when is_integer(N) ->
787    integer_to_list(N);
788to_string(N) when is_float(N) ->
789    float_to_list(N).
790
791split_ustar_path(Path) ->
792    Len = length(Path),
793    NotAscii = not is_ascii(Path),
794    if Len =< ?V7_NAME_LEN; NotAscii ->
795            false;
796       true ->
797            PathBin = binary:list_to_bin(Path),
798            case binary:split(PathBin, [<<$/>>], [global, trim_all]) of
799                [Part] when byte_size(Part) >= ?V7_NAME_LEN ->
800                    false;
801                Parts ->
802                    case lists:last(Parts) of
803                        Name when byte_size(Name) >= ?V7_NAME_LEN ->
804                            false;
805                        Name ->
806                            Parts2 = lists:sublist(Parts, length(Parts) - 1),
807                            join_split_ustar_path(Parts2, {ok, Name, nil})
808                    end
809            end
810    end.
811
812join_split_ustar_path([], Acc) ->
813    Acc;
814join_split_ustar_path([Part|_], {ok, _, nil})
815  when byte_size(Part) > ?USTAR_PREFIX_LEN ->
816    false;
817join_split_ustar_path([Part|_], {ok, _Name, Acc})
818  when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN ->
819    false;
820join_split_ustar_path([Part|Rest], {ok, Name, nil}) ->
821    join_split_ustar_path(Rest, {ok, Name, Part});
822join_split_ustar_path([Part|Rest], {ok, Name, Acc}) ->
823    join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}).
824
825write_octal(Block, Pos, Size, X) ->
826    Octal = zero_pad(format_octal(X), Size-1),
827    if byte_size(Octal) < Size ->
828            write_to_block(Block, Octal, Pos);
829       true ->
830            throw({error, {write_failed, octal_field_too_long}})
831    end.
832
833write_string(Block, Pos, Size, Str, PaxAttr, Pax0) ->
834    NotAscii = not is_ascii(Str),
835    if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) ->
836            Pax1 = maps:put(PaxAttr, Str, Pax0),
837            {Block, Pax1};
838       true ->
839            Formatted = format_string(Str, Size),
840            {write_to_block(Block, Formatted, Pos), Pax0}
841    end.
842write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) ->
843    %% attempt octal
844    Octal = zero_pad(format_octal(X), Size-1),
845    if byte_size(Octal) < Size ->
846            {write_to_block(Block, [Octal, 0], Pos), Pax0};
847       PaxAttr =/= ?PAX_NONE ->
848            Pax1 = maps:put(PaxAttr, X, Pax0),
849            {Block, Pax1};
850       true ->
851            throw({error, {write_failed, numeric_field_too_long}})
852    end.
853
854zero_pad(Str, Size) when byte_size(Str) >= Size ->
855    Str;
856zero_pad(Str, Size) ->
857    Padding = Size - byte_size(Str),
858    Pad = binary:copy(<<$0>>, Padding),
859    <<Pad/binary, Str/binary>>.
860
861
862%%%================================================================
863%% Functions for creating or modifying tar archives
864
865read_block(Reader) ->
866    case do_read(Reader, ?BLOCK_SIZE) of
867        eof ->
868            throw({error, eof});
869        %% Two zero blocks mark the end of the archive
870        {ok, ?ZERO_BLOCK, Reader1} ->
871            case do_read(Reader1, ?BLOCK_SIZE) of
872                eof ->
873                    % This is technically a malformed end-of-archive marker,
874                    % as two ZERO_BLOCKs are expected as the marker,
875                    % but if we've already made it this far, we should just ignore it
876                    eof;
877                {ok, ?ZERO_BLOCK, _Reader2} ->
878                    eof;
879                {ok, _Block, _Reader2} ->
880                    throw({error, invalid_end_of_archive});
881                {error,_} = Err ->
882                    throw(Err)
883            end;
884        {ok, Block, Reader1} when is_binary(Block) ->
885            {ok, Block, Reader1};
886        {error, _} = Err ->
887            throw(Err)
888    end.
889
890get_header(#reader{}=Reader) ->
891    case read_block(Reader) of
892        eof ->
893            eof;
894        {ok, Block, Reader1} ->
895            convert_header(Block, Reader1)
896    end.
897
898%% Converts the tar header to a record.
899to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
900    #header_v7{
901       name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN),
902       mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN),
903       uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN),
904       gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN),
905       size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN),
906       mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN),
907       checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN),
908       typeflag=binary:at(Bin, ?V7_TYPE),
909       linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN)
910      };
911to_v7(_) ->
912    {error, header_block_too_small}.
913
914to_gnu(#header_v7{}=V7, Bin)
915  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
916    #header_gnu{
917       header_v7=V7,
918       magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN),
919       version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN),
920       uname=binary_part(Bin, 265, 32),
921       gname=binary_part(Bin, 297, 32),
922       devmajor=binary_part(Bin, 329, 8),
923       devminor=binary_part(Bin, 337, 8),
924       atime=binary_part(Bin, 345, 12),
925       ctime=binary_part(Bin, 357, 12),
926       sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)),
927       real_size=binary_part(Bin, 483, 12)
928      }.
929
930to_star(#header_v7{}=V7, Bin)
931  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
932    #header_star{
933       header_v7=V7,
934       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
935       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
936       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
937       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
938       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
939       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
940       prefix=binary_part(Bin, 345, 131),
941       atime=binary_part(Bin, 476, 12),
942       ctime=binary_part(Bin, 488, 12),
943       trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN)
944      }.
945
946to_ustar(#header_v7{}=V7, Bin)
947  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
948    #header_ustar{
949       header_v7=V7,
950       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
951       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
952       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
953       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
954       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
955       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
956       prefix=binary_part(Bin, 345, 155)
957      }.
958
959to_sparse_array(Bin) when is_binary(Bin) ->
960    MaxEntries = byte_size(Bin) div 24,
961    IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries),
962    Entries = parse_sparse_entries(Bin, MaxEntries-1, []),
963    #sparse_array{
964       entries=Entries,
965       max_entries=MaxEntries,
966       is_extended=IsExtended
967      }.
968
969parse_sparse_entries(<<>>, _, Acc) ->
970    Acc;
971parse_sparse_entries(_, -1, Acc) ->
972    Acc;
973parse_sparse_entries(Bin, N, Acc) ->
974    case to_sparse_entry(binary_part(Bin, N*24, 24)) of
975        nil ->
976            parse_sparse_entries(Bin, N-1, Acc);
977        Entry = #sparse_entry{} ->
978            parse_sparse_entries(Bin, N-1, [Entry|Acc])
979    end.
980
981-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>).
982to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 ->
983    OffsetBin = binary_part(Bin, 0, 12),
984    NumBytesBin = binary_part(Bin, 12, 12),
985    case {OffsetBin, NumBytesBin} of
986        {?EMPTY_ENTRY, ?EMPTY_ENTRY} ->
987            nil;
988        _ ->
989            #sparse_entry{
990               offset=parse_numeric(OffsetBin),
991               num_bytes=parse_numeric(NumBytesBin)}
992    end.
993
994-spec get_format(binary()) -> {ok, pos_integer(), header_v7()}
995                                  | ?FORMAT_UNKNOWN
996                                  | {error, term()}.
997get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
998    do_get_format(to_v7(Bin), Bin).
999
1000do_get_format({error, _} = Err, _Bin) ->
1001    Err;
1002do_get_format(#header_v7{}=V7, Bin)
1003  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1004    Checksum = parse_octal(V7#header_v7.checksum),
1005    Chk1 = compute_checksum(Bin),
1006    Chk2 = compute_signed_checksum(Bin),
1007    if Checksum =/= Chk1 andalso Checksum =/= Chk2 ->
1008            ?FORMAT_UNKNOWN;
1009       true ->
1010            %% guess magic
1011            Ustar = to_ustar(V7, Bin),
1012            Star = to_star(V7, Bin),
1013            Magic = Ustar#header_ustar.magic,
1014            Version = Ustar#header_ustar.version,
1015            Trailer = Star#header_star.trailer,
1016            Format = if
1017                         Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR ->
1018                             ?FORMAT_STAR;
1019                         Magic =:= ?MAGIC_USTAR ->
1020                             ?FORMAT_USTAR;
1021                         Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU ->
1022                             ?FORMAT_GNU;
1023                         true ->
1024                             ?FORMAT_V7
1025                     end,
1026            {ok, Format, V7}
1027    end.
1028
1029unpack_format(Format, #header_v7{}=V7, Bin, Reader)
1030  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
1031    Mtime = parse_numeric(V7#header_v7.mtime),
1032    Header0 = #tar_header{
1033                 name=parse_string(V7#header_v7.name),
1034                 mode=parse_numeric(V7#header_v7.mode),
1035                 uid=parse_numeric(V7#header_v7.uid),
1036                 gid=parse_numeric(V7#header_v7.gid),
1037                 size=parse_numeric(V7#header_v7.size),
1038                 mtime=Mtime,
1039                 atime=Mtime,
1040                 ctime=Mtime,
1041                 typeflag=V7#header_v7.typeflag,
1042                 linkname=parse_string(V7#header_v7.linkname)
1043                },
1044    Typeflag = Header0#tar_header.typeflag,
1045    Header1 = if Format > ?FORMAT_V7 ->
1046                      unpack_modern(Format, V7, Bin, Header0);
1047                 true ->
1048                      Name = Header0#tar_header.name,
1049                      Header0#tar_header{name=safe_join_path("", Name)}
1050              end,
1051    HeaderOnly = is_header_only_type(Typeflag),
1052    Header2 = if HeaderOnly ->
1053                      Header1#tar_header{size=0};
1054                 true ->
1055                      Header1
1056              end,
1057    if Typeflag =:= ?TYPE_GNU_SPARSE ->
1058            Gnu = to_gnu(V7, Bin),
1059            RealSize = parse_numeric(Gnu#header_gnu.real_size),
1060            {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader),
1061            Header3 = Header2#tar_header{size=RealSize},
1062            {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)};
1063       true ->
1064            FileReader = #reg_file_reader{
1065                            handle=Reader,
1066                            num_bytes=Header2#tar_header.size,
1067                            size=Header2#tar_header.size,
1068                            pos = 0
1069                           },
1070            {Header2, FileReader}
1071    end.
1072
1073unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
1074  when is_binary(Bin) ->
1075    Typeflag = Header0#tar_header.typeflag,
1076    Ustar = to_ustar(V7, Bin),
1077    H0 = Header0#tar_header{
1078            uname=parse_string(Ustar#header_ustar.uname),
1079            gname=parse_string(Ustar#header_ustar.gname)},
1080    H1 = if Typeflag =:= ?TYPE_CHAR
1081            orelse Typeflag =:= ?TYPE_BLOCK ->
1082                Ma = parse_numeric(Ustar#header_ustar.devmajor),
1083                Mi = parse_numeric(Ustar#header_ustar.devminor),
1084                H0#tar_header{
1085                    devmajor=Ma,
1086                    devminor=Mi
1087                };
1088            true ->
1089                H0
1090        end,
1091    {Prefix, H2} = case Format of
1092                        ?FORMAT_USTAR ->
1093                            {parse_string(Ustar#header_ustar.prefix), H1};
1094                        ?FORMAT_STAR ->
1095                            Star = to_star(V7, Bin),
1096                            Prefix0 = parse_string(Star#header_star.prefix),
1097                            Atime0 = Star#header_star.atime,
1098                            Atime = parse_numeric(Atime0),
1099                            Ctime0 = Star#header_star.ctime,
1100                            Ctime = parse_numeric(Ctime0),
1101                            {Prefix0, H1#tar_header{
1102                                        atime=Atime,
1103                                        ctime=Ctime
1104                                    }};
1105                        _ ->
1106                            {"", H1}
1107                    end,
1108    Name = H2#tar_header.name,
1109    H2#tar_header{name=safe_join_path(Prefix, Name)}.
1110
1111
1112safe_join_path([], Name) ->
1113    filename:join([Name]);
1114safe_join_path(Prefix, []) ->
1115    filename:join([Prefix]);
1116safe_join_path(Prefix, Name) ->
1117    filename:join(Prefix, Name).
1118
1119new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
1120    true = validate_sparse_entries(Sparsemap, RealSize),
1121    #sparse_file_reader{
1122       handle = Reader,
1123       num_bytes = RealSize,
1124       pos = 0,
1125       size = RealSize,
1126       sparse_map = Sparsemap}.
1127
1128validate_sparse_entries(Entries, RealSize) ->
1129    validate_sparse_entries(Entries, RealSize, 0, 0).
1130validate_sparse_entries([], _RealSize, _I, _LastOffset) ->
1131    true;
1132validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) ->
1133    Offset = Entry#sparse_entry.offset,
1134    NumBytes = Entry#sparse_entry.num_bytes,
1135    if
1136        Offset > ?MAX_INT64-NumBytes ->
1137            throw({error, {invalid_sparse_map_entry, offset_too_large}});
1138        Offset+NumBytes > RealSize ->
1139            throw({error, {invalid_sparse_map_entry, offset_too_large}});
1140        I > 0 andalso LastOffset > Offset ->
1141            throw({error, {invalid_sparse_map_entry, overlapping_offsets}});
1142        true ->
1143            ok
1144    end,
1145    validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes).
1146
1147
1148-spec parse_sparse_map(header_gnu(), descriptor_type()) ->
1149                              {[sparse_entry()], descriptor_type()}.
1150parse_sparse_map(#header_gnu{sparse=Sparse}, Reader)
1151  when Sparse#sparse_array.is_extended ->
1152    parse_sparse_map(Sparse, Reader, []);
1153parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) ->
1154    {Sparse#sparse_array.entries, Reader}.
1155parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) ->
1156    case read_block(Reader) of
1157        eof ->
1158            throw({error, eof});
1159        {ok, Block, Reader2} ->
1160            Sparse2 = to_sparse_array(Block),
1161            parse_sparse_map(Sparse2, Reader2, Entries++Acc)
1162    end;
1163parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) ->
1164    Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) ->
1165                                A =< B
1166                        end, Entries++Acc),
1167    {Sorted, Reader}.
1168
1169%% Defined by taking the sum of the unsigned byte values of the
1170%% entire header record, treating the checksum bytes to as ASCII spaces
1171compute_checksum(<<H1:?V7_CHKSUM/binary,
1172                   H2:?V7_CHKSUM_LEN/binary,
1173                   Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
1174                   _/binary>>) ->
1175    C0 = checksum(H1) + (byte_size(H2) * $\s),
1176    C1 = checksum(Rest),
1177    C0 + C1.
1178
1179compute_signed_checksum(<<H1:?V7_CHKSUM/binary,
1180                          H2:?V7_CHKSUM_LEN/binary,
1181                          Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
1182                          _/binary>>) ->
1183    C0 = signed_checksum(H1) + (byte_size(H2) * $\s),
1184    C1 = signed_checksum(Rest),
1185    C0 + C1.
1186
1187%% Returns the checksum of a binary.
1188checksum(Bin) -> checksum(Bin, 0).
1189checksum(<<A/unsigned,Rest/binary>>, Sum) ->
1190    checksum(Rest, Sum+A);
1191checksum(<<>>, Sum) -> Sum.
1192
1193signed_checksum(Bin) -> signed_checksum(Bin, 0).
1194signed_checksum(<<A/signed,Rest/binary>>, Sum) ->
1195    signed_checksum(Rest, Sum+A);
1196signed_checksum(<<>>, Sum) -> Sum.
1197
1198-spec parse_numeric(binary()) -> non_neg_integer().
1199parse_numeric(<<>>) ->
1200    0;
1201parse_numeric(<<First, _/binary>> = Bin) ->
1202    %% check for base-256 format first
1203    %% if the bit is set, then all following bits constitute a two's
1204    %% complement encoded number in big-endian byte order
1205    if
1206        First band 16#80 =/= 0 ->
1207            %% Handling negative numbers relies on the following identity:
1208            %%     -a-1 == ^a
1209            %% If the number is negative, we use an inversion mask to invert
1210            %% the data bytes and treat the value as an unsigned number
1211            Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end,
1212            Bytes = binary:bin_to_list(Bin),
1213            Reducer = fun (C, {I, X}) ->
1214                              C1 = C bxor Inv,
1215                              C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end,
1216                              if (X bsr 56) > 0 ->
1217                                      throw({error,integer_overflow});
1218                                 true ->
1219                                      {I+1, (X bsl 8) bor C2}
1220                              end
1221                      end,
1222            {_, N} = lists:foldl(Reducer, {0,0}, Bytes),
1223            if (N bsr 63) > 0 ->
1224                    throw({error, integer_overflow});
1225               true ->
1226                    if Inv =:= 16#FF ->
1227                            -1 bxor N;
1228                       true ->
1229                            N
1230                    end
1231            end;
1232        true ->
1233            %% normal case is an octal number
1234            parse_octal(Bin)
1235    end.
1236
1237parse_octal(Bin) when is_binary(Bin) ->
1238    %% skip leading/trailing zero bytes and spaces
1239    do_parse_octal(Bin, <<>>).
1240do_parse_octal(<<>>, <<>>) ->
1241    0;
1242do_parse_octal(<<>>, Acc) ->
1243    case io_lib:fread("~8u", binary:bin_to_list(Acc)) of
1244        {error, _} -> throw({error, invalid_tar_checksum});
1245        {ok, [Octal], []} -> Octal;
1246        {ok, _, _} -> throw({error, invalid_tar_checksum})
1247    end;
1248do_parse_octal(<<$\s,Rest/binary>>, Acc) ->
1249    do_parse_octal(Rest, Acc);
1250do_parse_octal(<<0, Rest/binary>>, Acc) ->
1251    do_parse_octal(Rest, Acc);
1252do_parse_octal(<<C, Rest/binary>>, Acc) ->
1253    do_parse_octal(Rest, <<Acc/binary, C>>).
1254
1255parse_string(Bin) when is_binary(Bin) ->
1256    do_parse_string(Bin, <<>>).
1257do_parse_string(<<>>, Acc) ->
1258    case unicode:characters_to_list(Acc) of
1259        Str when is_list(Str) ->
1260            Str;
1261        {incomplete, _Str, _Rest} ->
1262            binary:bin_to_list(Acc);
1263        {error, _Str, _Rest} ->
1264            throw({error, {bad_header, invalid_string}})
1265    end;
1266do_parse_string(<<0, _/binary>>, Acc) ->
1267    do_parse_string(<<>>, Acc);
1268do_parse_string(<<C, Rest/binary>>, Acc) ->
1269    do_parse_string(Rest, <<Acc/binary, C>>).
1270
1271convert_header(Bin, #reader{pos=Pos}=Reader)
1272  when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 ->
1273    case get_format(Bin) of
1274        ?FORMAT_UNKNOWN ->
1275            throw({error, bad_header});
1276        {ok, Format, V7} ->
1277            unpack_format(Format, V7, Bin, Reader);
1278        {error, Reason} ->
1279            throw({error, {bad_header, Reason}})
1280    end;
1281convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE ->
1282    throw({error, misaligned_read, Pos});
1283convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 ->
1284    eof;
1285convert_header(_Bin, _Reader) ->
1286    throw({error, eof}).
1287
1288%% Creates a partially-populated header record based
1289%% on the provided file_info record. If the file is
1290%% a symlink, then `link` is used as the link target.
1291%% If the file is a directory, a slash is appended to the name.
1292fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) ->
1293    BaseHeader = #tar_header{name=Name,
1294                             mtime=Fi#file_info.mtime,
1295                             atime=Fi#file_info.atime,
1296                             ctime=Fi#file_info.ctime,
1297                             mode=Fi#file_info.mode,
1298                             uid=Fi#file_info.uid,
1299                             gid=Fi#file_info.gid,
1300                             typeflag=?TYPE_REGULAR},
1301    do_fileinfo_to_header(BaseHeader, Fi, Link).
1302
1303do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) ->
1304    Header#tar_header{size=Size,typeflag=?TYPE_REGULAR};
1305do_fileinfo_to_header(#tar_header{name=Name}=Header,
1306                      #file_info{type=directory}, _Link) ->
1307    Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR};
1308do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) ->
1309    Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link};
1310do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
1311  when (Mode band ?S_IFMT) =:= ?S_IFCHR ->
1312    Header#tar_header{typeflag=?TYPE_CHAR,
1313                      devmajor=Fi#file_info.major_device,
1314                      devminor=Fi#file_info.minor_device};
1315do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
1316  when (Mode band ?S_IFMT) =:= ?S_IFBLK ->
1317    Header#tar_header{typeflag=?TYPE_BLOCK,
1318                      devmajor=Fi#file_info.major_device,
1319                      devminor=Fi#file_info.minor_device};
1320do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link)
1321  when (Mode band ?S_IFMT) =:= ?S_FIFO ->
1322    Header#tar_header{typeflag=?TYPE_FIFO};
1323do_fileinfo_to_header(Header, Fi, _Link) ->
1324    {error, {invalid_file_type, Header#tar_header.name, Fi}}.
1325
1326is_ascii(Str) when is_list(Str) ->
1327    not lists:any(fun (Char) -> Char >= 16#80 end, Str);
1328is_ascii(Bin) when is_binary(Bin) ->
1329    is_ascii1(Bin).
1330
1331is_ascii1(<<>>) ->
1332    true;
1333is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 ->
1334    false;
1335is_ascii1(<<_, Rest/binary>>) ->
1336    is_ascii1(Rest).
1337
1338to_ascii(Str) when is_list(Str) ->
1339    case is_ascii(Str) of
1340        true ->
1341            unicode:characters_to_binary(Str);
1342        false ->
1343            Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str),
1344            unicode:characters_to_binary(Chars)
1345    end;
1346to_ascii(Bin) when is_binary(Bin) ->
1347    to_ascii(Bin, <<>>).
1348to_ascii(<<>>, Acc) ->
1349    Acc;
1350to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 ->
1351    to_ascii(Rest, <<Acc/binary,C>>);
1352to_ascii(<<_, Rest/binary>>, Acc) ->
1353    to_ascii(Rest, Acc).
1354
1355is_header_only_type(?TYPE_SYMLINK) -> true;
1356is_header_only_type(?TYPE_LINK)    -> true;
1357is_header_only_type(?TYPE_DIR)     -> true;
1358is_header_only_type(_) -> false.
1359
1360foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts)
1361  when is_function(Fun,4) ->
1362    case foldl_read0(Reader, Fun, Accu, Opts) of
1363        {ok, Result, _Reader2} ->
1364            Result;
1365        {error, _} = Err ->
1366            Err
1367    end;
1368foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) ->
1369    {error, {read_mode_expected, Access}};
1370foldl_read(TarName, Fun, Accu, #read_opts{}=Opts)
1371  when is_function(Fun,4) ->
1372    try open(TarName, [read|Opts#read_opts.open_mode]) of
1373        {ok, #reader{access=read}=Reader} ->
1374            try
1375                foldl_read(Reader, Fun, Accu, Opts)
1376            after
1377                _ = close(Reader)
1378            end;
1379        {error, _} = Err ->
1380            Err
1381    catch
1382        throw:Err ->
1383            Err
1384    end.
1385
1386foldl_read0(Reader, Fun, Accu, Opts) ->
1387    try foldl_read1(Fun, Accu, Reader, Opts, #{}) of
1388        {ok,_,_} = Ok ->
1389            Ok
1390    catch
1391        throw:{error, {Reason, Format, Args}} ->
1392            read_verbose(Opts, Format, Args),
1393            {error, Reason};
1394        throw:Err ->
1395            Err
1396    end.
1397
1398foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) ->
1399    {ok, Reader1} = skip_unread(Reader0),
1400    case get_header(Reader1) of
1401        eof ->
1402            Fun(eof, Reader1, Opts, Accu0);
1403        {Header, Reader2} ->
1404            case Header#tar_header.typeflag of
1405                ?TYPE_X_HEADER ->
1406                    {ExtraHeaders2, Reader3} = parse_pax(Reader2),
1407                    ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2),
1408                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3);
1409                ?TYPE_GNU_LONGNAME ->
1410                    {RealName, Reader3} = get_real_name(Reader2),
1411                    ExtraHeaders2 = maps:put(?PAX_PATH,
1412                                             parse_string(RealName), ExtraHeaders),
1413                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
1414                ?TYPE_GNU_LONGLINK ->
1415                    {RealName, Reader3} = get_real_name(Reader2),
1416                    ExtraHeaders2 = maps:put(?PAX_LINKPATH,
1417                                             parse_string(RealName), ExtraHeaders),
1418                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
1419                _ ->
1420                    Header1 = merge_pax(Header, ExtraHeaders),
1421                    {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0),
1422                    foldl_read1(Fun, NewAccu, Reader3, Opts, #{})
1423            end
1424    end.
1425
1426%% Applies all known PAX attributes to the current tar header
1427-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header().
1428merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) ->
1429    do_merge_pax(Header, maps:to_list(ExtraHeaders)).
1430
1431do_merge_pax(Header, []) ->
1432    Header;
1433do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) ->
1434    do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest);
1435do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) ->
1436    do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest);
1437do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) ->
1438    do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest);
1439do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) ->
1440    do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest);
1441do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) ->
1442    Uid2 = binary_to_integer(Uid),
1443    do_merge_pax(Header#tar_header{uid=Uid2}, Rest);
1444do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) ->
1445    Gid2 = binary_to_integer(Gid),
1446    do_merge_pax(Header#tar_header{gid=Gid2}, Rest);
1447do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) ->
1448    Atime2 = parse_pax_time(Atime),
1449    do_merge_pax(Header#tar_header{atime=Atime2}, Rest);
1450do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) ->
1451    Mtime2 = parse_pax_time(Mtime),
1452    do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest);
1453do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) ->
1454    Ctime2 = parse_pax_time(Ctime),
1455    do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest);
1456do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) ->
1457    Size2 = binary_to_integer(Size),
1458    do_merge_pax(Header#tar_header{size=Size2}, Rest);
1459do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) ->
1460    do_merge_pax(Header, Rest);
1461do_merge_pax(Header, [_Ignore|Rest]) ->
1462    do_merge_pax(Header, Rest).
1463
1464%% Returns the time since UNIX epoch as a datetime
1465-spec parse_pax_time(binary()) -> tar_time().
1466parse_pax_time(Bin) when is_binary(Bin) ->
1467    TotalNano = case binary:split(Bin, [<<$.>>]) of
1468                    [SecondsStr, NanoStr0] ->
1469                        Seconds = binary_to_integer(SecondsStr),
1470                        if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE ->
1471                                %% right pad
1472                                PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0),
1473                                Padding = binary:copy(<<$0>>, PaddingN),
1474                                NanoStr1 = <<NanoStr0/binary,Padding/binary>>,
1475                                Nano = binary_to_integer(NanoStr1),
1476                                (Seconds*?BILLION)+Nano;
1477                           byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE ->
1478                                %% right truncate
1479                                NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE),
1480                                Nano = binary_to_integer(NanoStr1),
1481                                (Seconds*?BILLION)+Nano;
1482                           true ->
1483                                (Seconds*?BILLION)+binary_to_integer(NanoStr0)
1484                        end;
1485                    [SecondsStr] ->
1486                        binary_to_integer(SecondsStr)*?BILLION
1487                end,
1488    %% truncate to microseconds
1489    Micro = TotalNano div 1000,
1490    Mega = Micro div 1000000000000,
1491    Secs = Micro div 1000000 - (Mega*1000000),
1492    Secs.
1493
1494%% Given a regular file reader, reads the whole file and
1495%% parses all extended attributes it contains.
1496parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1497    {#{}, Handle};
1498parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
1499    case do_read(Handle0, NumBytes) of
1500        {ok, Bytes, Handle1} ->
1501            do_parse_pax(Handle1, Bytes, #{});
1502        {error, _} = Err ->
1503            throw(Err)
1504    end.
1505
1506do_parse_pax(Reader, <<>>, Headers) ->
1507    {Headers, Reader};
1508do_parse_pax(Reader, Bin, Headers) ->
1509    {Key, Value, Residual} = parse_pax_record(Bin),
1510    NewHeaders = maps:put(Key, Value, Headers),
1511    do_parse_pax(Reader, Residual, NewHeaders).
1512
1513%% Parse an extended attribute
1514parse_pax_record(Bin) when is_binary(Bin) ->
1515    case binary:split(Bin, [<<$\n>>]) of
1516        [Record, Residual] ->
1517            case binary:split(Record, [<<$\s>>], [trim_all]) of
1518                [_Len, Record1] ->
1519                    case binary:split(Record1, [<<$=>>], [trim_all]) of
1520                        [AttrName, AttrValue] ->
1521                            {AttrName, AttrValue, Residual};
1522                        _Other ->
1523                            throw({error, malformed_pax_record})
1524                    end;
1525                _Other ->
1526                    throw({error, malformed_pax_record})
1527            end;
1528        _Other ->
1529            throw({error, malformed_pax_record})
1530    end.
1531
1532get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1533    {"", Handle};
1534get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
1535    case do_read(Handle0, NumBytes) of
1536        {ok, RealName, Handle1} ->
1537            {RealName, Handle1};
1538        {error, _} = Err ->
1539            throw(Err)
1540    end;
1541get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) ->
1542    case do_read(Reader0, NumBytes) of
1543        {ok, RealName, Reader1} ->
1544            {RealName, Reader1};
1545        {error, _} = Err ->
1546            throw(Err)
1547    end.
1548
1549%% Skip the remaining bytes for the current file entry
1550skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) ->
1551    Padding = skip_padding(Size),
1552    AbsPos = Handle0#reader.pos + (Size-Pos) + Padding,
1553    case do_position(Handle0, AbsPos) of
1554        {ok, _, Handle1} ->
1555            Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size};
1556        Err ->
1557            throw(Err)
1558    end;
1559skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) ->
1560    case do_read(Reader, Size-Pos) of
1561        {ok, _, Reader2} ->
1562            Reader2;
1563        Err ->
1564            throw(Err)
1565    end.
1566
1567skip_padding(0) ->
1568    0;
1569skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 ->
1570    0;
1571skip_padding(Size) when Size =< ?BLOCK_SIZE ->
1572    ?BLOCK_SIZE - Size;
1573skip_padding(Size) ->
1574    ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE).
1575
1576skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 ->
1577    Padding = skip_padding(Pos + ?BLOCK_SIZE),
1578    AbsPos = Pos + Padding,
1579    case do_position(Reader0, AbsPos) of
1580        {ok, _, Reader1} ->
1581            {ok, Reader1};
1582        Err ->
1583            throw(Err)
1584    end;
1585skip_unread(#reader{}=Reader) ->
1586    {ok, Reader};
1587skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) ->
1588    skip_unread(Handle);
1589skip_unread(#reg_file_reader{}=Reader) ->
1590    #reg_file_reader{handle=Handle} = skip_file(Reader),
1591    {ok, Handle};
1592skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) ->
1593    skip_unread(Handle);
1594skip_unread(#sparse_file_reader{}=Reader) ->
1595    #sparse_file_reader{handle=Handle} = skip_file(Reader),
1596    {ok, Handle}.
1597
1598write_extracted_element(#tar_header{name=Name,typeflag=Type},
1599                        Bin,
1600                        #read_opts{output=memory}=Opts) ->
1601    case typeflag(Type) of
1602        regular ->
1603            read_verbose(Opts, "x ~ts~n", [Name]),
1604            {ok, {Name, Bin}};
1605        _ ->
1606            ok
1607    end;
1608write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
1609    Name1 = make_safe_path(Name0, Opts),
1610    Created =
1611        case typeflag(Header#tar_header.typeflag) of
1612            regular ->
1613                create_regular(Name1, Name0, Bin, Opts);
1614            directory ->
1615                read_verbose(Opts, "x ~ts~n", [Name0]),
1616                create_extracted_dir(Name1, Opts);
1617            symlink ->
1618                read_verbose(Opts, "x ~ts~n", [Name0]),
1619                LinkName = safe_link_name(Header, Opts),
1620                create_symlink(Name1, LinkName, Opts);
1621            Device when Device =:= char orelse Device =:= block ->
1622                %% char/block devices will be created as empty files
1623                %% and then have their major/minor device set later
1624                create_regular(Name1, Name0, <<>>, Opts);
1625            fifo ->
1626                %% fifo devices will be created as empty files
1627                create_regular(Name1, Name0, <<>>, Opts);
1628            Other -> % Ignore.
1629                read_verbose(Opts, "x ~ts - unsupported type ~p~n",
1630                             [Name0, Other]),
1631                not_written
1632        end,
1633    case Created of
1634        ok  -> set_extracted_file_info(Name1, Header);
1635        not_written -> ok
1636    end.
1637
1638make_safe_path([$/|Path], Opts) ->
1639    make_safe_path(Path, Opts);
1640make_safe_path(Path, #read_opts{cwd=Cwd}) ->
1641    case filename:safe_relative_path(Path) of
1642        unsafe ->
1643            throw({error,{Path,unsafe_path}});
1644        SafePath ->
1645            filename:absname(SafePath, Cwd)
1646    end.
1647
1648safe_link_name(#tar_header{linkname=Path}, #read_opts{cwd=Cwd}) ->
1649    case safe_relative_path_links(Path, Cwd) of
1650        unsafe ->
1651            throw({error,{Path,unsafe_symlink}});
1652        SafePath ->
1653            SafePath
1654    end.
1655
1656safe_relative_path_links(Path, Cwd) ->
1657    case filename:pathtype(Path) of
1658        relative -> safe_relative_path_links(filename:split(Path), Cwd, [], "");
1659        _ -> unsafe
1660    end.
1661
1662safe_relative_path_links([Segment|Segments], Cwd, PrevSegments, Acc) ->
1663    AccSegment = join(Acc, Segment),
1664    case lists:member(AccSegment, PrevSegments) of
1665        true ->
1666            unsafe;
1667        false ->
1668            case file:read_link(join(Cwd, AccSegment)) of
1669                {ok, LinkPath} ->
1670                    case filename:pathtype(LinkPath) of
1671                        relative ->
1672                            safe_relative_path_links(filename:split(LinkPath) ++ Segments,
1673                                                     Cwd, [AccSegment|PrevSegments], Acc);
1674                        _ ->
1675                            unsafe
1676                    end;
1677
1678                {error, _} ->
1679                    case filename:safe_relative_path(join(Acc, Segment)) of
1680                        unsafe ->
1681                            unsafe;
1682                        NewAcc ->
1683                            safe_relative_path_links(Segments, Cwd,
1684                                                     [AccSegment|PrevSegments], NewAcc)
1685                    end
1686            end
1687    end;
1688safe_relative_path_links([], _Cwd, _PrevSegments, Acc) ->
1689    Acc.
1690
1691join([], Path) -> Path;
1692join(Left, Right) -> filename:join(Left, Right).
1693
1694create_regular(Name, NameInArchive, Bin, Opts) ->
1695    case write_extracted_file(Name, Bin, Opts) of
1696        not_written ->
1697            read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]),
1698            not_written;
1699        Ok ->
1700            read_verbose(Opts, "x ~ts~n", [NameInArchive]),
1701            Ok
1702    end.
1703
1704create_extracted_dir(Name, _Opts) ->
1705    case file:make_dir(Name) of
1706        ok -> ok;
1707        {error,enotsup} -> not_written;
1708        {error,eexist} -> not_written;
1709        {error,enoent} -> make_dirs(Name, dir);
1710        {error,Reason} -> throw({error, Reason})
1711    end.
1712
1713create_symlink(Name, Linkname, Opts) ->
1714    case file:make_symlink(Linkname, Name) of
1715        ok -> ok;
1716        {error,enoent} ->
1717            ok = make_dirs(Name, file),
1718            create_symlink(Name, Linkname, Opts);
1719        {error,eexist} -> not_written;
1720        {error,enotsup} ->
1721            read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
1722            not_written;
1723        {error,Reason} -> throw({error, Reason})
1724    end.
1725
1726write_extracted_file(Name, Bin, Opts) ->
1727    Write =
1728        case Opts#read_opts.keep_old_files of
1729            true ->
1730                case file:read_file_info(Name) of
1731                    {ok, _} -> false;
1732                    _ -> true
1733                end;
1734            false -> true
1735        end,
1736    case Write of
1737        true  -> write_file(Name, Bin);
1738        false -> not_written
1739    end.
1740
1741write_file(Name, Bin) ->
1742    case file:write_file(Name, Bin) of
1743        ok -> ok;
1744        {error,enoent} ->
1745            case make_dirs(Name, file) of
1746                ok ->
1747                    write_file(Name, Bin);
1748                {error,Reason} ->
1749                    throw({error, Reason})
1750            end;
1751        {error,Reason} ->
1752            throw({error, Reason})
1753    end.
1754
1755set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok;
1756set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK})    -> ok;
1757set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) ->
1758    set_device_info(Name, Header);
1759set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) ->
1760    set_device_info(Name, Header);
1761set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) ->
1762    Info = #file_info{mode=Mode, mtime=Mtime},
1763    file:write_file_info(Name, Info, [{time, posix}]).
1764
1765set_device_info(Name, #tar_header{}=Header) ->
1766    Mtime = Header#tar_header.mtime,
1767    Mode = Header#tar_header.mode,
1768    Devmajor = Header#tar_header.devmajor,
1769    Devminor = Header#tar_header.devminor,
1770    Info = #file_info{
1771              mode=Mode,
1772              mtime=Mtime,
1773              major_device=Devmajor,
1774              minor_device=Devminor
1775             },
1776    file:write_file_info(Name, Info).
1777
1778%% Makes all directories leading up to the file.
1779
1780make_dirs(Name, file) ->
1781    filelib:ensure_dir(Name);
1782make_dirs(Name, dir) ->
1783    filelib:ensure_dir(filename:join(Name,"*")).
1784
1785%% Prints the message on if the verbose option is given (for reading).
1786read_verbose(#read_opts{verbose=true}, Format, Args) ->
1787    io:format(Format, Args);
1788read_verbose(_, _, _) ->
1789    ok.
1790
1791%% Prints the message on if the verbose option is given.
1792add_verbose(#add_opts{verbose=true}, Format, Args) ->
1793    io:format(Format, Args);
1794add_verbose(_, _, _) ->
1795    ok.
1796
1797%%%%%%%%%%%%%%%%%%
1798%% I/O primitives
1799%%%%%%%%%%%%%%%%%%
1800
1801do_write(#reader{handle=Handle,func=Fun}=Reader0, Data)
1802  when is_function(Fun,2) ->
1803    case Fun(write,{Handle,Data}) of
1804        ok ->
1805            {ok, Pos, Reader1} = do_position(Reader0, {cur,0}),
1806            {ok, Reader1#reader{pos=Pos}};
1807        {error, _} = Err ->
1808            Err
1809    end.
1810
1811do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts)
1812  when is_function(Fun, 2) ->
1813    do_copy(Reader, Source, Opts#add_opts{chunk_size=65536});
1814do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize})
1815    when is_function(Fun, 2) ->
1816    case file:open(Source, [read, binary]) of
1817        {ok, SourceFd} ->
1818            case copy_chunked(Reader, SourceFd, ChunkSize, 0) of
1819                {ok, _Copied, _Reader2} = Ok->
1820                    _ = file:close(SourceFd),
1821                    Ok;
1822                Err ->
1823                    _ = file:close(SourceFd),
1824                    throw(Err)
1825            end;
1826        Err ->
1827            throw(Err)
1828    end.
1829
1830copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) ->
1831    case file:read(Source, ChunkSize) of
1832        {ok, Bin} ->
1833            {ok, Reader2} = do_write(Reader, Bin),
1834            copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin));
1835        eof ->
1836            {ok, Copied, Reader};
1837        Other ->
1838            Other
1839    end.
1840
1841
1842do_position(#reader{handle=Handle,func=Fun}=Reader, Pos)
1843  when is_function(Fun,2)->
1844    case Fun(position, {Handle,Pos}) of
1845        {ok, NewPos} ->
1846            %% since Pos may not always be an absolute seek,
1847            %% make sure we update the reader with the new absolute position
1848            {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}),
1849            {ok, NewPos, Reader#reader{pos=AbsPos}};
1850        Other ->
1851            Other
1852    end.
1853
1854do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) ->
1855    NumBytes = Size - Pos,
1856    ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end,
1857    case do_read(Handle, ActualLen) of
1858        {ok, Bin, Handle2} ->
1859            NewPos = Pos + ActualLen,
1860            NumBytes2 = Size - NewPos,
1861            Reader1 = Reader#reg_file_reader{
1862                        handle=Handle2,
1863                        pos=NewPos,
1864                        num_bytes=NumBytes2},
1865            {ok, Bin, Reader1};
1866        Other ->
1867            Other
1868    end;
1869do_read(#sparse_file_reader{}=Reader, Len) ->
1870    do_sparse_read(Reader, Len);
1871do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len)
1872  when is_function(Fun,2)->
1873    %% Always convert to binary internally
1874    case Fun(read2,{Handle,Len}) of
1875        {ok, List} when is_list(List) ->
1876            Bin = list_to_binary(List),
1877            NewPos = Pos+byte_size(Bin),
1878            {ok, Bin, Reader#reader{pos=NewPos}};
1879        {ok, Bin} when is_binary(Bin) ->
1880            NewPos = Pos+byte_size(Bin),
1881            {ok, Bin, Reader#reader{pos=NewPos}};
1882        Other ->
1883            Other
1884    end.
1885
1886
1887do_sparse_read(Reader, Len) ->
1888    do_sparse_read(Reader, Len, <<>>).
1889
1890do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries]
1891                                  }=Reader0, Len, Acc) ->
1892    %% skip all empty fragments
1893    Reader1 = Reader0#sparse_file_reader{sparse_map=Entries},
1894    do_sparse_read(Reader1, Len, Acc);
1895do_sparse_read(#sparse_file_reader{sparse_map=[],
1896                                   pos=Pos,size=Size}=Reader0, Len, Acc)
1897  when Pos < Size ->
1898    %% if there are no more fragments, it is possible that there is one last sparse hole
1899    %% this behaviour matches the BSD tar utility
1900    %% however, GNU tar stops returning data even if we haven't reached the end
1901    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len),
1902    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1903do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) ->
1904    {ok, Acc, Reader};
1905do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) ->
1906    {ok, Acc, Reader};
1907do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_],
1908                                   pos=Pos}=Reader0, Len, Acc)
1909  when Pos < Offset ->
1910    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos),
1911    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1912do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries],
1913                                   pos=Pos}=Reader0, Len, Acc) ->
1914    %% we're in a data fragment, so read from it
1915    %% end offset of fragment
1916    EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes,
1917    %% bytes left in fragment
1918    NumBytes = EndPos - Pos,
1919    ActualLen = if Len > NumBytes -> NumBytes; true -> Len end,
1920    case do_read(Reader0#sparse_file_reader.handle, ActualLen) of
1921        {ok, Bin, Handle} ->
1922            BytesRead = byte_size(Bin),
1923            ActualEndPos = Pos+BytesRead,
1924            Reader1 = if ActualEndPos =:= EndPos ->
1925                              Reader0#sparse_file_reader{sparse_map=Entries};
1926                         true ->
1927                              Reader0
1928                      end,
1929            Size = Reader1#sparse_file_reader.size,
1930            NumBytes2 = Size - ActualEndPos,
1931            Reader2 = Reader1#sparse_file_reader{
1932                        handle=Handle,
1933                        pos=ActualEndPos,
1934                        num_bytes=NumBytes2},
1935            do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
1936        Other ->
1937            Other
1938    end.
1939
1940%% Reads a sparse hole ending at Offset
1941read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) ->
1942    N = Offset - Pos,
1943    N2 = if N > Len ->
1944                 Len;
1945            true ->
1946                 N
1947         end,
1948    Bin = <<0:N2/unit:8>>,
1949    NumBytes = Reader#sparse_file_reader.size - (Pos+N2),
1950    {ok, Bin, Reader#sparse_file_reader{
1951                num_bytes=NumBytes,
1952                pos=Pos+N2}}.
1953
1954-spec do_close(tar_descriptor()) -> ok | {error, term()}.
1955do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) ->
1956    Fun(close,Handle).
1957
1958%%%%%%%%%%%%%%%%%%
1959%% Option parsing
1960%%%%%%%%%%%%%%%%%%
1961
1962extract_opts(List) ->
1963    extract_opts(List, default_options()).
1964
1965table_opts(List) ->
1966    read_opts(List, default_options()).
1967
1968default_options() ->
1969    {ok, Cwd} = file:get_cwd(),
1970    #read_opts{cwd=Cwd}.
1971
1972extract_opts([keep_old_files|Rest], Opts) ->
1973    extract_opts(Rest, Opts#read_opts{keep_old_files=true});
1974extract_opts([{cwd, Cwd}|Rest], Opts) ->
1975    extract_opts(Rest, Opts#read_opts{cwd=Cwd});
1976extract_opts([{files, Files}|Rest], Opts) ->
1977    Set = ordsets:from_list(Files),
1978    extract_opts(Rest, Opts#read_opts{files=Set});
1979extract_opts([memory|Rest], Opts) ->
1980    extract_opts(Rest, Opts#read_opts{output=memory});
1981extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1982    extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
1983extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1984    extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
1985extract_opts([verbose|Rest], Opts) ->
1986    extract_opts(Rest, Opts#read_opts{verbose=true});
1987extract_opts([Other|Rest], Opts) ->
1988    extract_opts(Rest, read_opts([Other], Opts));
1989extract_opts([], Opts) ->
1990    Opts.
1991
1992read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1993    read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
1994read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
1995    read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
1996read_opts([verbose|Rest], Opts) ->
1997    read_opts(Rest, Opts#read_opts{verbose=true});
1998read_opts([_|Rest], Opts) ->
1999    read_opts(Rest, Opts);
2000read_opts([], Opts) ->
2001    Opts.
2002