1%% Vendored from hex_core v0.8.2, do not edit manually 2 3%% @private 4%% Copied from https://github.com/erlang/otp/blob/OTP-20.0.1/lib/stdlib/src/erl_tar.erl 5%% with modifications: 6%% - Change module name to `mix_hex_erl_tar` 7%% - Set tar mtimes to 0 and remove dependency on :os.system_time/1 8%% - Preserve modes when building tarball 9%% - Do not crash if failing to write tar 10%% - Allow setting file_info opts on :mix_hex_erl_tar.add 11%% - Add safe_relative_path_links/2 to check directory traversal vulnerability when extracting files, 12%% it differs from OTP's current fix (2020-02-04) in that it checks regular files instead of 13%% symlink targets. This allows creating symlinks with relative path targets such as `../tmp/log` 14%% - Remove ram_file usage (backported from OTP master) 15 16%% 17%% %CopyrightBegin% 18%% 19%% Copyright Ericsson AB 1997-2017. All Rights Reserved. 20%% 21%% Licensed under the Apache License, Version 2.0 (the "License"); 22%% you may not use this file except in compliance with the License. 23%% You may obtain a copy of the License at 24%% 25%% http://www.apache.org/licenses/LICENSE-2.0 26%% 27%% Unless required by applicable law or agreed to in writing, software 28%% distributed under the License is distributed on an "AS IS" BASIS, 29%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 30%% See the License for the specific language governing permissions and 31%% limitations under the License. 32%% 33%% %CopyrightEnd% 34%% 35%% This module implements extraction/creation of tar archives. 36%% It supports reading most common tar formats, namely V7, STAR, 37%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR 38%% format, unless it must use PAX headers, in which case it produces PAX 39%% format. 40%% 41%% The following references where used: 42%% http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 43%% http://www.gnu.org/software/tar/manual/html_node/Standard.html 44%% http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html 45-module(mix_hex_erl_tar). 46 47-export([init/3, 48 create/2, create/3, 49 extract/1, extract/2, 50 table/1, table/2, t/1, tt/1, 51 open/2, close/1, 52 add/3, add/4, add/5, 53 format_error/1]). 54 55-include_lib("kernel/include/file.hrl"). 56-include_lib("mix_hex_erl_tar.hrl"). 57 58%% Converts the short error reason to a descriptive string. 59-spec format_error(term()) -> string(). 60format_error(invalid_tar_checksum) -> 61 "Checksum failed"; 62format_error(bad_header) -> 63 "Unrecognized tar header format"; 64format_error({bad_header, Reason}) -> 65 lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason])); 66format_error({invalid_header, negative_size}) -> 67 "Invalid header: negative size"; 68format_error(invalid_sparse_header_size) -> 69 "Invalid sparse header: negative size"; 70format_error(invalid_sparse_map_entry) -> 71 "Invalid sparse map entry"; 72format_error({invalid_sparse_map_entry, Reason}) -> 73 lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason])); 74format_error(invalid_end_of_archive) -> 75 "Invalid end of archive"; 76format_error(eof) -> 77 "Unexpected end of file"; 78format_error(integer_overflow) -> 79 "Failed to parse numeric: integer overflow"; 80format_error({misaligned_read, Pos}) -> 81 lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p", 82 [?BLOCK_SIZE, Pos])); 83format_error(invalid_gnu_1_0_sparsemap) -> 84 "Invalid GNU sparse map (version 1.0)"; 85format_error({invalid_gnu_0_1_sparsemap, Format}) -> 86 lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format])); 87format_error(unsafe_path) -> 88 "The path points above the current working directory"; 89format_error({Name,Reason}) -> 90 lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)])); 91format_error(Atom) when is_atom(Atom) -> 92 file:format_error(Atom); 93format_error(Term) -> 94 lists:flatten(io_lib:format("~tp", [Term])). 95 96%% Initializes a new reader given a custom file handle and I/O wrappers 97-spec init(handle(), write | read, file_op()) -> {ok, reader()} | {error, badarg}. 98init(Handle, AccessMode, Fun) when is_function(Fun, 2) -> 99 Reader = #reader{handle=Handle,access=AccessMode,func=Fun}, 100 {ok, Pos, Reader2} = do_position(Reader, {cur, 0}), 101 {ok, Reader2#reader{pos=Pos}}; 102init(_Handle, _AccessMode, _Fun) -> 103 {error, badarg}. 104 105%%%================================================================ 106%% Extracts all files from the tar file Name. 107-spec extract(open_handle()) -> ok | {error, term()}. 108extract(Name) -> 109 extract(Name, []). 110 111%% Extracts (all) files from the tar file Name. 112%% Options accepted: 113%% - cooked: Opens the tar file without mode `raw` 114%% - compressed: Uncompresses the tar file when reading 115%% - memory: Returns the tar contents as a list of tuples {Name, Bin} 116%% - keep_old_files: Extracted files will not overwrite the destination 117%% - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract 118%% - verbose: Prints verbose information about the extraction, 119%% - {cwd, AbsoluteDir}: Sets the current working directory for the extraction 120-spec extract(open_handle(), [extract_opt()]) -> 121 ok 122 | {ok, [{string(), binary()}]} 123 | {error, term()}. 124extract({binary, Bin}, Opts) when is_list(Opts) -> 125 do_extract({binary, Bin}, Opts); 126extract({file, Fd}, Opts) when is_list(Opts) -> 127 do_extract({file, Fd}, Opts); 128extract(#reader{}=Reader, Opts) when is_list(Opts) -> 129 do_extract(Reader, Opts); 130extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) -> 131 do_extract(Name, Opts). 132 133do_extract(Handle, Opts) when is_list(Opts) -> 134 Opts2 = extract_opts(Opts), 135 Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end, 136 foldl_read(Handle, fun extract1/4, Acc, Opts2). 137 138extract1(eof, Reader, _, Acc) when is_list(Acc) -> 139 {ok, {ok, lists:reverse(Acc)}, Reader}; 140extract1(eof, Reader, _, leading_slash) -> 141 error_logger:info_msg("erl_tar: removed leading '/' from member names\n"), 142 {ok, ok, Reader}; 143extract1(eof, Reader, _, Acc) -> 144 {ok, Acc, Reader}; 145extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) -> 146 case check_extract(Name, Opts) of 147 true -> 148 case do_read(Reader0, Size) of 149 {ok, Bin, Reader1} -> 150 Acc = extract2(Header, Bin, Opts, Acc0), 151 {ok, Acc, Reader1}; 152 {error, _} = Err -> 153 throw(Err) 154 end; 155 false -> 156 {ok, Acc0, skip_file(Reader0)} 157 end. 158 159extract2(Header, Bin, Opts, Acc) -> 160 case write_extracted_element(Header, Bin, Opts) of 161 ok -> 162 case Header of 163 #tar_header{name="/"++_} -> 164 leading_slash; 165 #tar_header{} -> 166 Acc 167 end; 168 {ok, NameBin} when is_list(Acc) -> 169 [NameBin | Acc]; 170 {error, _} = Err -> 171 throw(Err) 172 end. 173 174%% Checks if the file Name should be extracted. 175check_extract(_, #read_opts{files=all}) -> 176 true; 177check_extract(Name, #read_opts{files=Files}) -> 178 ordsets:is_element(Name, Files). 179 180%%%================================================================ 181%% The following table functions produce a list of information about 182%% the files contained in the archive. 183-type filename() :: string(). 184-type typeflag() :: regular | link | symlink | 185 char | block | directory | 186 fifo | reserved | unknown. 187-type mode() :: non_neg_integer(). 188-type uid() :: non_neg_integer(). 189-type gid() :: non_neg_integer(). 190 191-type tar_entry() :: {filename(), 192 typeflag(), 193 non_neg_integer(), 194 tar_time(), 195 mode(), 196 uid(), 197 gid()}. 198 199%% Returns a list of names of the files in the tar file Name. 200-spec table(open_handle()) -> {ok, [string()]} | {error, term()}. 201table(Name) -> 202 table(Name, []). 203 204%% Returns a list of names of the files in the tar file Name. 205%% Options accepted: compressed, verbose, cooked. 206-spec table(open_handle(), [compressed | verbose | cooked]) -> 207 {ok, [tar_entry()]} | {error, term()}. 208table(Name, Opts) when is_list(Opts) -> 209 foldl_read(Name, fun table1/4, [], table_opts(Opts)). 210 211table1(eof, Reader, _, Result) -> 212 {ok, {ok, lists:reverse(Result)}, Reader}; 213table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) -> 214 Attrs = table1_attrs(Header, Verbose), 215 Reader2 = skip_file(Reader), 216 {ok, [Attrs|Result], Reader2}. 217 218%% Extracts attributes relevant to table1's output 219table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) -> 220 Type = typeflag(Typeflag), 221 Name = Header#tar_header.name, 222 Mtime = Header#tar_header.mtime, 223 Uid = Header#tar_header.uid, 224 Gid = Header#tar_header.gid, 225 Size = Header#tar_header.size, 226 {Name, Type, Size, Mtime, Mode, Uid, Gid}; 227table1_attrs(#tar_header{name=Name}, _Verbose) -> 228 Name. 229 230typeflag(?TYPE_REGULAR) -> regular; 231typeflag(?TYPE_REGULAR_A) -> regular; 232typeflag(?TYPE_GNU_SPARSE) -> regular; 233typeflag(?TYPE_CONT) -> regular; 234typeflag(?TYPE_LINK) -> link; 235typeflag(?TYPE_SYMLINK) -> symlink; 236typeflag(?TYPE_CHAR) -> char; 237typeflag(?TYPE_BLOCK) -> block; 238typeflag(?TYPE_DIR) -> directory; 239typeflag(?TYPE_FIFO) -> fifo; 240typeflag(_) -> unknown. 241 242%%%================================================================ 243%% Comments for printing the contents of a tape archive, 244%% meant to be invoked from the shell. 245 246%% Prints each filename in the archive 247-spec t(file:filename()) -> ok | {error, term()}. 248t(Name) when is_list(Name); is_binary(Name) -> 249 case table(Name) of 250 {ok, List} -> 251 lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List); 252 Error -> 253 Error 254 end. 255 256%% Prints verbose information about each file in the archive 257-spec tt(open_handle()) -> ok | {error, term()}. 258tt(Name) -> 259 case table(Name, [verbose]) of 260 {ok, List} -> 261 lists:foreach(fun print_header/1, List); 262 Error -> 263 Error 264 end. 265 266%% Used by tt/1 to print a tar_entry tuple 267-spec print_header(tar_entry()) -> ok. 268print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) -> 269 io:format("~s~s ~4w/~-4w ~7w ~s ~s\n", 270 [type_to_string(Type), mode_to_string(Mode), 271 Uid, Gid, Size, time_to_string(Mtime), Name]). 272 273type_to_string(regular) -> "-"; 274type_to_string(directory) -> "d"; 275type_to_string(link) -> "l"; 276type_to_string(symlink) -> "s"; 277type_to_string(char) -> "c"; 278type_to_string(block) -> "b"; 279type_to_string(fifo) -> "f"; 280type_to_string(unknown) -> "?". 281 282%% Converts a numeric mode to its human-readable representation 283mode_to_string(Mode) -> 284 mode_to_string(Mode, "xwrxwrxwr", []). 285mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 -> 286 mode_to_string(Mode bsr 1, T, [C|Acc]); 287mode_to_string(Mode, [_|T], Acc) -> 288 mode_to_string(Mode bsr 1, T, [$-|Acc]); 289mode_to_string(_, [], Acc) -> 290 Acc. 291 292%% Converts a tar_time() (POSIX time) to a readable string 293time_to_string(Secs0) -> 294 Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH), 295 Secs = Epoch + Secs0, 296 DateTime0 = calendar:gregorian_seconds_to_datetime(Secs), 297 DateTime = calendar:universal_time_to_local_time(DateTime0), 298 {{Y, Mon, Day}, {H, Min, _}} = DateTime, 299 io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]). 300 301two_d(N) -> 302 tl(integer_to_list(N + 100)). 303 304month(1) -> "Jan"; 305month(2) -> "Feb"; 306month(3) -> "Mar"; 307month(4) -> "Apr"; 308month(5) -> "May"; 309month(6) -> "Jun"; 310month(7) -> "Jul"; 311month(8) -> "Aug"; 312month(9) -> "Sep"; 313month(10) -> "Oct"; 314month(11) -> "Nov"; 315month(12) -> "Dec". 316 317%%%================================================================ 318%% The open function with friends is to keep the file and binary api of this module 319-type open_handle() :: file:filename() 320 | {binary, binary()} 321 | {file, term()}. 322-spec open(open_handle(), [write | compressed | cooked]) -> 323 {ok, reader()} | {error, term()}. 324open({binary, Bin}, Mode) when is_binary(Bin) -> 325 do_open({binary, Bin}, Mode); 326open({file, Fd}, Mode) -> 327 do_open({file, Fd}, Mode); 328open(Name, Mode) when is_list(Name); is_binary(Name) -> 329 do_open(Name, Mode). 330 331do_open(Name, Mode) when is_list(Mode) -> 332 case open_mode(Mode) of 333 {ok, Access, Raw, Opts} -> 334 open1(Name, Access, Raw, Opts); 335 {error, Reason} -> 336 {error, {Name, Reason}} 337 end. 338 339open1({binary,Bin0}, read, _Raw, Opts) when is_binary(Bin0) -> 340 Bin = case lists:member(compressed, Opts) of 341 true -> 342 try 343 zlib:gunzip(Bin0) 344 catch 345 _:_ -> Bin0 346 end; 347 false -> 348 Bin0 349 end, 350 case file:open(Bin, [ram,binary,read]) of 351 {ok,File} -> 352 {ok, #reader{handle=File,access=read,func=fun file_op/2}}; 353 Error -> 354 Error 355 end; 356open1({file, Fd}, read, _Raw, _Opts) -> 357 Reader = #reader{handle=Fd,access=read,func=fun file_op/2}, 358 case do_position(Reader, {cur, 0}) of 359 {ok, Pos, Reader2} -> 360 {ok, Reader2#reader{pos=Pos}}; 361 {error, _} = Err -> 362 Err 363 end; 364open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) -> 365 case file:open(Name, Raw ++ [binary, Access|Opts]) of 366 {ok, File} -> 367 {ok, #reader{handle=File,access=Access,func=fun file_op/2}}; 368 {error, Reason} -> 369 {error, {Name, Reason}} 370 end. 371 372open_mode(Mode) -> 373 open_mode(Mode, false, [raw], []). 374 375open_mode(read, _, Raw, _) -> 376 {ok, read, Raw, []}; 377open_mode(write, _, Raw, _) -> 378 {ok, write, Raw, []}; 379open_mode([read|Rest], false, Raw, Opts) -> 380 open_mode(Rest, read, Raw, Opts); 381open_mode([write|Rest], false, Raw, Opts) -> 382 open_mode(Rest, write, Raw, Opts); 383open_mode([compressed|Rest], Access, Raw, Opts) -> 384 open_mode(Rest, Access, Raw, [compressed|Opts]); 385open_mode([cooked|Rest], Access, _Raw, Opts) -> 386 open_mode(Rest, Access, [], Opts); 387open_mode([], Access, Raw, Opts) -> 388 {ok, Access, Raw, Opts}; 389open_mode(_, _, _, _) -> 390 {error, einval}. 391 392file_op(write, {Fd, Data}) -> 393 file:write(Fd, Data); 394file_op(position, {Fd, Pos}) -> 395 file:position(Fd, Pos); 396file_op(read2, {Fd, Size}) -> 397 file:read(Fd, Size); 398file_op(close, Fd) -> 399 file:close(Fd). 400 401%% Closes a tar archive. 402-spec close(reader()) -> ok | {error, term()}. 403close(#reader{access=read}=Reader) -> 404 ok = do_close(Reader); 405close(#reader{access=write}=Reader) -> 406 {ok, Reader2} = pad_file(Reader), 407 ok = do_close(Reader2), 408 ok; 409close(_) -> 410 {error, einval}. 411 412pad_file(#reader{pos=Pos}=Reader) -> 413 %% There must be at least two zero blocks at the end. 414 PadCurrent = skip_padding(Pos+?BLOCK_SIZE), 415 Padding = <<0:PadCurrent/unit:8>>, 416 do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]). 417 418 419%%%================================================================ 420%% Creation/modification of tar archives 421 422%% Creates a tar file Name containing the given files. 423-spec create(file:filename(), filelist()) -> ok | {error, {string(), term()}}. 424create(Name, FileList) when is_list(Name); is_binary(Name) -> 425 create(Name, FileList, []). 426 427%% Creates a tar archive Name containing the given files. 428%% Accepted options: verbose, compressed, cooked 429-spec create(file:filename(), filelist(), [create_opt()]) -> 430 ok | {error, term()} | {error, {string(), term()}}. 431create(Name, FileList, Options) when is_list(Name); is_binary(Name) -> 432 Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) 433 end, Options), 434 case open(Name, [write|Mode]) of 435 {ok, TarFile} -> 436 do_create(TarFile, FileList, Options); 437 {error, _} = Err -> 438 Err 439 end. 440 441do_create(TarFile, [], _Opts) -> 442 close(TarFile); 443do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) -> 444 case add(TarFile, NameOrBin, NameInArchive, Opts) of 445 ok -> 446 do_create(TarFile, Rest, Opts); 447 {error, _} = Err -> 448 _ = close(TarFile), 449 Err 450 end; 451do_create(TarFile, [Name|Rest], Opts) -> 452 case add(TarFile, Name, Name, Opts) of 453 ok -> 454 do_create(TarFile, Rest, Opts); 455 {error, _} = Err -> 456 _ = close(TarFile), 457 Err 458 end. 459 460%% Adds a file to a tape archive. 461-type add_type() :: string() 462 | {string(), string()} 463 | {string(), binary()}. 464-spec add(reader(), add_type(), [add_opt()]) -> ok | {error, term()}. 465add(Reader, {NameInArchive, Name}, Opts) 466 when is_list(NameInArchive), is_list(Name) -> 467 do_add(Reader, Name, NameInArchive, undefined, Opts); 468add(Reader, {NameInArchive, Bin}, Opts) 469 when is_list(NameInArchive), is_binary(Bin) -> 470 do_add(Reader, Bin, NameInArchive, undefined, Opts); 471add(Reader, {NameInArchive, Bin, Mode}, Opts) 472 when is_list(NameInArchive), is_binary(Bin), is_integer(Mode) -> 473 do_add(Reader, Bin, NameInArchive, Mode, Opts); 474add(Reader, Name, Opts) when is_list(Name) -> 475 do_add(Reader, Name, Name, undefined, Opts). 476 477 478-spec add(reader(), string() | binary(), string(), [add_opt()]) -> 479 ok | {error, term()}. 480add(Reader, NameOrBin, NameInArchive, Options) 481 when is_list(NameOrBin); is_binary(NameOrBin), 482 is_list(NameInArchive), is_list(Options) -> 483 do_add(Reader, NameOrBin, NameInArchive, undefined, Options). 484 485-spec add(reader(), string() | binary(), string(), integer(), [add_opt()]) -> 486 ok | {error, term()}. 487add(Reader, NameOrBin, NameInArchive, Mode, Options) 488 when is_list(NameOrBin); is_binary(NameOrBin), 489 is_list(NameInArchive), is_integer(Mode), is_list(Options) -> 490 do_add(Reader, NameOrBin, NameInArchive, Mode, Options). 491 492do_add(#reader{access=write}=Reader, Name, NameInArchive, Mode, Options) 493 when is_list(NameInArchive), is_list(Options) -> 494 RF = fun(F) -> apply_file_info_opts(Options, file:read_link_info(F, [{time, posix}])) end, 495 Opts = #add_opts{read_info=RF}, 496 add1(Reader, Name, NameInArchive, Mode, add_opts(Options, Options, Opts)); 497do_add(#reader{access=read},_,_,_,_) -> 498 {error, eacces}; 499do_add(Reader,_,_,_,_) -> 500 {error, {badarg, Reader}}. 501 502add_opts([dereference|T], AllOptions, Opts) -> 503 RF = fun(F) -> apply_file_info_opts(AllOptions, file:read_file_info(F, [{time, posix}])) end, 504 add_opts(T, AllOptions, Opts#add_opts{read_info=RF}); 505add_opts([verbose|T], AllOptions, Opts) -> 506 add_opts(T, AllOptions, Opts#add_opts{verbose=true}); 507add_opts([{chunks,N}|T], AllOptions, Opts) -> 508 add_opts(T, AllOptions, Opts#add_opts{chunk_size=N}); 509add_opts([{atime,Value}|T], AllOptions, Opts) -> 510 add_opts(T, AllOptions, Opts#add_opts{atime=Value}); 511add_opts([{mtime,Value}|T], AllOptions, Opts) -> 512 add_opts(T, AllOptions, Opts#add_opts{mtime=Value}); 513add_opts([{ctime,Value}|T], AllOptions, Opts) -> 514 add_opts(T, AllOptions, Opts#add_opts{ctime=Value}); 515add_opts([{uid,Value}|T], AllOptions, Opts) -> 516 add_opts(T, AllOptions, Opts#add_opts{uid=Value}); 517add_opts([{gid,Value}|T], AllOptions, Opts) -> 518 add_opts(T, AllOptions, Opts#add_opts{gid=Value}); 519add_opts([_|T], AllOptions, Opts) -> 520 add_opts(T, AllOptions, Opts); 521add_opts([], _AllOptions, Opts) -> 522 Opts. 523 524apply_file_info_opts(Opts, {ok, FileInfo}) -> 525 {ok, do_apply_file_info_opts(Opts, FileInfo)}; 526apply_file_info_opts(_Opts, Other) -> 527 Other. 528 529do_apply_file_info_opts([{atime,Value}|T], FileInfo) -> 530 do_apply_file_info_opts(T, FileInfo#file_info{atime=Value}); 531do_apply_file_info_opts([{mtime,Value}|T], FileInfo) -> 532 do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value}); 533do_apply_file_info_opts([{ctime,Value}|T], FileInfo) -> 534 do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value}); 535do_apply_file_info_opts([{uid,Value}|T], FileInfo) -> 536 do_apply_file_info_opts(T, FileInfo#file_info{uid=Value}); 537do_apply_file_info_opts([{gid,Value}|T], FileInfo) -> 538 do_apply_file_info_opts(T, FileInfo#file_info{gid=Value}); 539do_apply_file_info_opts([_|T], FileInfo) -> 540 do_apply_file_info_opts(T, FileInfo); 541do_apply_file_info_opts([], FileInfo) -> 542 FileInfo. 543 544add1(#reader{}=Reader, Name, NameInArchive, undefined, #add_opts{read_info=ReadInfo}=Opts) 545 when is_list(Name) -> 546 Res = case ReadInfo(Name) of 547 {error, Reason0} -> 548 {error, {Name, Reason0}}; 549 {ok, #file_info{type=symlink}=Fi} -> 550 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 551 {ok, Linkname} = file:read_link(Name), 552 Header = fileinfo_to_header(NameInArchive, Fi, Linkname), 553 add_header(Reader, Header, Opts); 554 {ok, #file_info{type=regular}=Fi} -> 555 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 556 Header = fileinfo_to_header(NameInArchive, Fi, false), 557 {ok, Reader2} = add_header(Reader, Header, Opts), 558 FileSize = Header#tar_header.size, 559 {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts), 560 Padding = skip_padding(FileSize), 561 Pad = <<0:Padding/unit:8>>, 562 do_write(Reader3, Pad); 563 {ok, #file_info{type=directory}=Fi} -> 564 add_directory(Reader, Name, NameInArchive, Fi, Opts); 565 {ok, #file_info{}=Fi} -> 566 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 567 Header = fileinfo_to_header(NameInArchive, Fi, false), 568 add_header(Reader, Header, Opts) 569 end, 570 case Res of 571 ok -> ok; 572 {ok, _Reader} -> ok; 573 {error, _Reason} = Err -> Err 574 end; 575add1(Reader, Bin, NameInArchive, Mode, Opts) when is_binary(Bin) -> 576 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 577 Now = 0, 578 Header = #tar_header{ 579 name = NameInArchive, 580 size = byte_size(Bin), 581 typeflag = ?TYPE_REGULAR, 582 atime = add_opts_time(Opts#add_opts.atime, Now), 583 mtime = add_opts_time(Opts#add_opts.mtime, Now), 584 ctime = add_opts_time(Opts#add_opts.ctime, Now), 585 uid = Opts#add_opts.uid, 586 gid = Opts#add_opts.gid, 587 mode = default_mode(Mode, 8#100644)}, 588 {ok, Reader2} = add_header(Reader, Header, Opts), 589 Padding = skip_padding(byte_size(Bin)), 590 Data = [Bin, <<0:Padding/unit:8>>], 591 case do_write(Reader2, Data) of 592 {ok, _Reader3} -> ok; 593 {error, Reason} -> {error, {NameInArchive, Reason}} 594 end. 595 596add_opts_time(undefined, _Now) -> 0; 597add_opts_time(Time, _Now) -> Time. 598 599default_mode(undefined, Mode) -> Mode; 600default_mode(Mode, _) -> Mode. 601 602add_directory(Reader, DirName, NameInArchive, Info, Opts) -> 603 case file:list_dir(DirName) of 604 {ok, []} -> 605 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 606 Header = fileinfo_to_header(NameInArchive, Info, false), 607 add_header(Reader, Header, Opts); 608 {ok, Files} -> 609 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 610 try add_files(Reader, Files, DirName, NameInArchive, Opts) of 611 ok -> ok; 612 {error, _} = Err -> Err 613 catch 614 throw:{error, {_Name, _Reason}} = Err -> Err; 615 throw:{error, Reason} -> {error, {DirName, Reason}} 616 end; 617 {error, Reason} -> 618 {error, {DirName, Reason}} 619 end. 620 621add_files(_Reader, [], _Dir, _DirInArchive, _Opts) -> 622 ok; 623add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) -> 624 FullName = filename:join(Dir, Name), 625 NameInArchive = filename:join(DirInArchive, Name), 626 Res = case Info(FullName) of 627 {error, Reason} -> 628 {error, {FullName, Reason}}; 629 {ok, #file_info{type=directory}=Fi} -> 630 add_directory(Reader, FullName, NameInArchive, Fi, Opts); 631 {ok, #file_info{type=symlink}=Fi} -> 632 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 633 {ok, Linkname} = file:read_link(FullName), 634 Header = fileinfo_to_header(NameInArchive, Fi, Linkname), 635 add_header(Reader, Header, Opts); 636 {ok, #file_info{type=regular}=Fi} -> 637 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 638 Header = fileinfo_to_header(NameInArchive, Fi, false), 639 {ok, Reader2} = add_header(Reader, Header, Opts), 640 FileSize = Header#tar_header.size, 641 {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts), 642 Padding = skip_padding(FileSize), 643 Pad = <<0:Padding/unit:8>>, 644 do_write(Reader3, Pad); 645 {ok, #file_info{}=Fi} -> 646 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 647 Header = fileinfo_to_header(NameInArchive, Fi, false), 648 add_header(Reader, Header, Opts) 649 end, 650 case Res of 651 ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts); 652 {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts); 653 {error, _} = Err -> Err 654 end. 655 656format_string(String, Size) when length(String) > Size -> 657 throw({error, {write_string, field_too_long}}); 658format_string(String, Size) -> 659 Ascii = to_ascii(String), 660 if byte_size(Ascii) < Size -> 661 [Ascii, 0]; 662 true -> 663 Ascii 664 end. 665 666format_octal(Octal) -> 667 iolist_to_binary(io_lib:fwrite("~.8B", [Octal])). 668 669add_header(#reader{}=Reader, #tar_header{}=Header, Opts) -> 670 {ok, Iodata} = build_header(Header, Opts), 671 do_write(Reader, Iodata). 672 673write_to_block(Block, IoData, Start) when is_list(IoData) -> 674 write_to_block(Block, iolist_to_binary(IoData), Start); 675write_to_block(Block, Bin, Start) when is_binary(Bin) -> 676 Size = byte_size(Bin), 677 <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block, 678 <<Head:Start/unit:8, Bin/binary, Rest/binary>>. 679 680build_header(#tar_header{}=Header, Opts) -> 681 #tar_header{ 682 name=Name, 683 mode=Mode, 684 uid=Uid, 685 gid=Gid, 686 size=Size, 687 typeflag=Type, 688 linkname=Linkname, 689 uname=Uname, 690 gname=Gname, 691 devmajor=Devmaj, 692 devminor=Devmin 693 } = Header, 694 Mtime = Header#tar_header.mtime, 695 696 Block0 = ?ZERO_BLOCK, 697 {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}), 698 Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode), 699 {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0), 700 {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1), 701 {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2), 702 {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3), 703 {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4), 704 {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN, 705 Linkname, ?PAX_LINKPATH, Pax5), 706 {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN, 707 Uname, ?PAX_UNAME, Pax6), 708 {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN, 709 Gname, ?PAX_GNAME, Pax7), 710 {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN, 711 Devmaj, ?PAX_NONE, Pax8), 712 {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN, 713 Devmin, ?PAX_NONE, Pax9), 714 {Block13, Pax11} = set_path(Block12, Pax10), 715 PaxEntry = case maps:size(Pax11) of 716 0 -> []; 717 _ -> build_pax_entry(Header, Pax11, Opts) 718 end, 719 Block14 = set_format(Block13, ?FORMAT_USTAR), 720 Block15 = set_checksum(Block14), 721 {ok, [PaxEntry, Block15]}. 722 723set_path(Block0, Pax) -> 724 %% only use ustar header when name is too long 725 case maps:get(?PAX_PATH, Pax, nil) of 726 nil -> 727 {Block0, Pax}; 728 PaxPath -> 729 case split_ustar_path(PaxPath) of 730 {ok, UstarName, UstarPrefix} -> 731 {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, 732 UstarName, ?PAX_NONE, #{}), 733 {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN, 734 UstarPrefix, ?PAX_NONE, #{}), 735 {Block2, maps:remove(?PAX_PATH, Pax)}; 736 false -> 737 {Block0, Pax} 738 end 739 end. 740 741set_format(Block0, Format) 742 when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX -> 743 Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC), 744 write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION); 745set_format(_Block, Format) -> 746 throw({error, {invalid_format, Format}}). 747 748set_checksum(Block) -> 749 Checksum = compute_checksum(Block), 750 write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum). 751 752build_pax_entry(Header, PaxAttrs, Opts) -> 753 Path = Header#tar_header.name, 754 Filename = filename:basename(Path), 755 Dir = filename:dirname(Path), 756 Path2 = filename:join([Dir, "PaxHeaders.0", Filename]), 757 AsciiPath = to_ascii(Path2), 758 Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN -> 759 binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1); 760 true -> 761 AsciiPath 762 end, 763 Keys = maps:keys(PaxAttrs), 764 SortedKeys = lists:sort(Keys), 765 PaxFile = build_pax_file(SortedKeys, PaxAttrs), 766 Size = byte_size(PaxFile), 767 Padding = (?BLOCK_SIZE - 768 (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE, 769 Pad = <<0:Padding/unit:8>>, 770 PaxHeader = #tar_header{ 771 name=unicode:characters_to_list(Path3), 772 size=Size, 773 mtime=Header#tar_header.mtime, 774 atime=Header#tar_header.atime, 775 ctime=Header#tar_header.ctime, 776 typeflag=?TYPE_X_HEADER 777 }, 778 {ok, PaxHeaderData} = build_header(PaxHeader, Opts), 779 [PaxHeaderData, PaxFile, Pad]. 780 781build_pax_file(Keys, PaxAttrs) -> 782 build_pax_file(Keys, PaxAttrs, []). 783build_pax_file([], _, Acc) -> 784 unicode:characters_to_binary(Acc); 785build_pax_file([K|Rest], Attrs, Acc) -> 786 V = maps:get(K, Attrs), 787 Size = sizeof(K) + sizeof(V) + 3, 788 Size2 = sizeof(Size) + Size, 789 Key = to_string(K), 790 Value = to_string(V), 791 Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])), 792 if byte_size(Record) =/= Size2 -> 793 Size3 = byte_size(Record), 794 Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]), 795 build_pax_file(Rest, Attrs, [Acc, Record2]); 796 true -> 797 build_pax_file(Rest, Attrs, [Acc, Record]) 798 end. 799 800sizeof(Bin) when is_binary(Bin) -> 801 byte_size(Bin); 802sizeof(List) when is_list(List) -> 803 length(List); 804sizeof(N) when is_integer(N) -> 805 byte_size(integer_to_binary(N)); 806sizeof(N) when is_float(N) -> 807 byte_size(float_to_binary(N)). 808 809to_string(Bin) when is_binary(Bin) -> 810 unicode:characters_to_list(Bin); 811to_string(List) when is_list(List) -> 812 List; 813to_string(N) when is_integer(N) -> 814 integer_to_list(N); 815to_string(N) when is_float(N) -> 816 float_to_list(N). 817 818split_ustar_path(Path) -> 819 Len = length(Path), 820 NotAscii = not is_ascii(Path), 821 if Len =< ?V7_NAME_LEN; NotAscii -> 822 false; 823 true -> 824 PathBin = binary:list_to_bin(Path), 825 case binary:split(PathBin, [<<$/>>], [global, trim_all]) of 826 [Part] when byte_size(Part) >= ?V7_NAME_LEN -> 827 false; 828 Parts -> 829 case lists:last(Parts) of 830 Name when byte_size(Name) >= ?V7_NAME_LEN -> 831 false; 832 Name -> 833 Parts2 = lists:sublist(Parts, length(Parts) - 1), 834 join_split_ustar_path(Parts2, {ok, Name, nil}) 835 end 836 end 837 end. 838 839join_split_ustar_path([], Acc) -> 840 Acc; 841join_split_ustar_path([Part|_], {ok, _, nil}) 842 when byte_size(Part) > ?USTAR_PREFIX_LEN -> 843 false; 844join_split_ustar_path([Part|_], {ok, _Name, Acc}) 845 when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN -> 846 false; 847join_split_ustar_path([Part|Rest], {ok, Name, nil}) -> 848 join_split_ustar_path(Rest, {ok, Name, Part}); 849join_split_ustar_path([Part|Rest], {ok, Name, Acc}) -> 850 join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}). 851 852write_octal(Block, Pos, Size, X) -> 853 Octal = zero_pad(format_octal(X), Size-1), 854 if byte_size(Octal) < Size -> 855 write_to_block(Block, Octal, Pos); 856 true -> 857 throw({error, {write_failed, octal_field_too_long}}) 858 end. 859 860write_string(Block, Pos, Size, Str, PaxAttr, Pax0) -> 861 NotAscii = not is_ascii(Str), 862 if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) -> 863 Pax1 = maps:put(PaxAttr, Str, Pax0), 864 {Block, Pax1}; 865 true -> 866 Formatted = format_string(Str, Size), 867 {write_to_block(Block, Formatted, Pos), Pax0} 868 end. 869write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) -> 870 %% attempt octal 871 Octal = zero_pad(format_octal(X), Size-1), 872 if byte_size(Octal) < Size -> 873 {write_to_block(Block, [Octal, 0], Pos), Pax0}; 874 PaxAttr =/= ?PAX_NONE -> 875 Pax1 = maps:put(PaxAttr, X, Pax0), 876 {Block, Pax1}; 877 true -> 878 throw({error, {write_failed, numeric_field_too_long}}) 879 end. 880 881zero_pad(Str, Size) when byte_size(Str) >= Size -> 882 Str; 883zero_pad(Str, Size) -> 884 Padding = Size - byte_size(Str), 885 Pad = binary:copy(<<$0>>, Padding), 886 <<Pad/binary, Str/binary>>. 887 888 889%%%================================================================ 890%% Functions for creating or modifying tar archives 891 892read_block(Reader) -> 893 case do_read(Reader, ?BLOCK_SIZE) of 894 eof -> 895 throw({error, eof}); 896 %% Two zero blocks mark the end of the archive 897 {ok, ?ZERO_BLOCK, Reader1} -> 898 case do_read(Reader1, ?BLOCK_SIZE) of 899 eof -> 900 % This is technically a malformed end-of-archive marker, 901 % as two ZERO_BLOCKs are expected as the marker, 902 % but if we've already made it this far, we should just ignore it 903 eof; 904 {ok, ?ZERO_BLOCK, _Reader2} -> 905 eof; 906 {ok, _Block, _Reader2} -> 907 throw({error, invalid_end_of_archive}); 908 {error,_} = Err -> 909 throw(Err) 910 end; 911 {ok, Block, Reader1} when is_binary(Block) -> 912 {ok, Block, Reader1}; 913 {error, _} = Err -> 914 throw(Err) 915 end. 916 917get_header(#reader{}=Reader) -> 918 case read_block(Reader) of 919 eof -> 920 eof; 921 {ok, Block, Reader1} -> 922 convert_header(Block, Reader1) 923 end. 924 925%% Converts the tar header to a record. 926to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 927 #header_v7{ 928 name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN), 929 mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN), 930 uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN), 931 gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN), 932 size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN), 933 mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN), 934 checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN), 935 typeflag=binary:at(Bin, ?V7_TYPE), 936 linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN) 937 }; 938to_v7(_) -> 939 {error, header_block_too_small}. 940 941to_gnu(#header_v7{}=V7, Bin) 942 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 943 #header_gnu{ 944 header_v7=V7, 945 magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN), 946 version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN), 947 uname=binary_part(Bin, 265, 32), 948 gname=binary_part(Bin, 297, 32), 949 devmajor=binary_part(Bin, 329, 8), 950 devminor=binary_part(Bin, 337, 8), 951 atime=binary_part(Bin, 345, 12), 952 ctime=binary_part(Bin, 357, 12), 953 sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)), 954 real_size=binary_part(Bin, 483, 12) 955 }. 956 957to_star(#header_v7{}=V7, Bin) 958 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 959 #header_star{ 960 header_v7=V7, 961 magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), 962 version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), 963 uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), 964 gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), 965 devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), 966 devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), 967 prefix=binary_part(Bin, 345, 131), 968 atime=binary_part(Bin, 476, 12), 969 ctime=binary_part(Bin, 488, 12), 970 trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN) 971 }. 972 973to_ustar(#header_v7{}=V7, Bin) 974 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 975 #header_ustar{ 976 header_v7=V7, 977 magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), 978 version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), 979 uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), 980 gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), 981 devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), 982 devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), 983 prefix=binary_part(Bin, 345, 155) 984 }. 985 986to_sparse_array(Bin) when is_binary(Bin) -> 987 MaxEntries = byte_size(Bin) div 24, 988 IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries), 989 Entries = parse_sparse_entries(Bin, MaxEntries-1, []), 990 #sparse_array{ 991 entries=Entries, 992 max_entries=MaxEntries, 993 is_extended=IsExtended 994 }. 995 996parse_sparse_entries(<<>>, _, Acc) -> 997 Acc; 998parse_sparse_entries(_, -1, Acc) -> 999 Acc; 1000parse_sparse_entries(Bin, N, Acc) -> 1001 case to_sparse_entry(binary_part(Bin, N*24, 24)) of 1002 nil -> 1003 parse_sparse_entries(Bin, N-1, Acc); 1004 Entry = #sparse_entry{} -> 1005 parse_sparse_entries(Bin, N-1, [Entry|Acc]) 1006 end. 1007 1008-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>). 1009to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 -> 1010 OffsetBin = binary_part(Bin, 0, 12), 1011 NumBytesBin = binary_part(Bin, 12, 12), 1012 case {OffsetBin, NumBytesBin} of 1013 {?EMPTY_ENTRY, ?EMPTY_ENTRY} -> 1014 nil; 1015 _ -> 1016 #sparse_entry{ 1017 offset=parse_numeric(OffsetBin), 1018 num_bytes=parse_numeric(NumBytesBin)} 1019 end. 1020 1021-spec get_format(binary()) -> {ok, pos_integer(), header_v7()} 1022 | ?FORMAT_UNKNOWN 1023 | {error, term()}. 1024get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1025 do_get_format(to_v7(Bin), Bin). 1026 1027do_get_format({error, _} = Err, _Bin) -> 1028 Err; 1029do_get_format(#header_v7{}=V7, Bin) 1030 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1031 Checksum = parse_octal(V7#header_v7.checksum), 1032 Chk1 = compute_checksum(Bin), 1033 Chk2 = compute_signed_checksum(Bin), 1034 if Checksum =/= Chk1 andalso Checksum =/= Chk2 -> 1035 ?FORMAT_UNKNOWN; 1036 true -> 1037 %% guess magic 1038 Ustar = to_ustar(V7, Bin), 1039 Star = to_star(V7, Bin), 1040 Magic = Ustar#header_ustar.magic, 1041 Version = Ustar#header_ustar.version, 1042 Trailer = Star#header_star.trailer, 1043 Format = if 1044 Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR -> 1045 ?FORMAT_STAR; 1046 Magic =:= ?MAGIC_USTAR -> 1047 ?FORMAT_USTAR; 1048 Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU -> 1049 ?FORMAT_GNU; 1050 true -> 1051 ?FORMAT_V7 1052 end, 1053 {ok, Format, V7} 1054 end. 1055 1056unpack_format(Format, #header_v7{}=V7, Bin, Reader) 1057 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1058 Mtime = parse_numeric(V7#header_v7.mtime), 1059 Header0 = #tar_header{ 1060 name=parse_string(V7#header_v7.name), 1061 mode=parse_numeric(V7#header_v7.mode), 1062 uid=parse_numeric(V7#header_v7.uid), 1063 gid=parse_numeric(V7#header_v7.gid), 1064 size=parse_numeric(V7#header_v7.size), 1065 mtime=Mtime, 1066 atime=Mtime, 1067 ctime=Mtime, 1068 typeflag=V7#header_v7.typeflag, 1069 linkname=parse_string(V7#header_v7.linkname) 1070 }, 1071 Typeflag = Header0#tar_header.typeflag, 1072 Header1 = if Format > ?FORMAT_V7 -> 1073 unpack_modern(Format, V7, Bin, Header0); 1074 true -> 1075 Name = Header0#tar_header.name, 1076 Header0#tar_header{name=safe_join_path("", Name)} 1077 end, 1078 HeaderOnly = is_header_only_type(Typeflag), 1079 Header2 = if HeaderOnly -> 1080 Header1#tar_header{size=0}; 1081 true -> 1082 Header1 1083 end, 1084 if Typeflag =:= ?TYPE_GNU_SPARSE -> 1085 Gnu = to_gnu(V7, Bin), 1086 RealSize = parse_numeric(Gnu#header_gnu.real_size), 1087 {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader), 1088 Header3 = Header2#tar_header{size=RealSize}, 1089 {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)}; 1090 true -> 1091 FileReader = #reg_file_reader{ 1092 handle=Reader, 1093 num_bytes=Header2#tar_header.size, 1094 size=Header2#tar_header.size, 1095 pos = 0 1096 }, 1097 {Header2, FileReader} 1098 end. 1099 1100unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0) 1101 when is_binary(Bin) -> 1102 Typeflag = Header0#tar_header.typeflag, 1103 Ustar = to_ustar(V7, Bin), 1104 H0 = Header0#tar_header{ 1105 uname=parse_string(Ustar#header_ustar.uname), 1106 gname=parse_string(Ustar#header_ustar.gname)}, 1107 H1 = if Typeflag =:= ?TYPE_CHAR 1108 orelse Typeflag =:= ?TYPE_BLOCK -> 1109 Ma = parse_numeric(Ustar#header_ustar.devmajor), 1110 Mi = parse_numeric(Ustar#header_ustar.devminor), 1111 H0#tar_header{ 1112 devmajor=Ma, 1113 devminor=Mi 1114 }; 1115 true -> 1116 H0 1117 end, 1118 {Prefix, H2} = case Format of 1119 ?FORMAT_USTAR -> 1120 {parse_string(Ustar#header_ustar.prefix), H1}; 1121 ?FORMAT_STAR -> 1122 Star = to_star(V7, Bin), 1123 Prefix0 = parse_string(Star#header_star.prefix), 1124 Atime0 = Star#header_star.atime, 1125 Atime = parse_numeric(Atime0), 1126 Ctime0 = Star#header_star.ctime, 1127 Ctime = parse_numeric(Ctime0), 1128 {Prefix0, H1#tar_header{ 1129 atime=Atime, 1130 ctime=Ctime 1131 }}; 1132 _ -> 1133 {"", H1} 1134 end, 1135 Name = H2#tar_header.name, 1136 H2#tar_header{name=safe_join_path(Prefix, Name)}. 1137 1138 1139safe_join_path([], Name) -> 1140 filename:join([Name]); 1141safe_join_path(Prefix, []) -> 1142 filename:join([Prefix]); 1143safe_join_path(Prefix, Name) -> 1144 filename:join(Prefix, Name). 1145 1146new_sparse_file_reader(Reader, Sparsemap, RealSize) -> 1147 true = validate_sparse_entries(Sparsemap, RealSize), 1148 #sparse_file_reader{ 1149 handle = Reader, 1150 num_bytes = RealSize, 1151 pos = 0, 1152 size = RealSize, 1153 sparse_map = Sparsemap}. 1154 1155validate_sparse_entries(Entries, RealSize) -> 1156 validate_sparse_entries(Entries, RealSize, 0, 0). 1157validate_sparse_entries([], _RealSize, _I, _LastOffset) -> 1158 true; 1159validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) -> 1160 Offset = Entry#sparse_entry.offset, 1161 NumBytes = Entry#sparse_entry.num_bytes, 1162 if 1163 Offset > ?MAX_INT64-NumBytes -> 1164 throw({error, {invalid_sparse_map_entry, offset_too_large}}); 1165 Offset+NumBytes > RealSize -> 1166 throw({error, {invalid_sparse_map_entry, offset_too_large}}); 1167 I > 0 andalso LastOffset > Offset -> 1168 throw({error, {invalid_sparse_map_entry, overlapping_offsets}}); 1169 true -> 1170 ok 1171 end, 1172 validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes). 1173 1174 1175-spec parse_sparse_map(header_gnu(), reader_type()) -> 1176 {[sparse_entry()], reader_type()}. 1177parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) 1178 when Sparse#sparse_array.is_extended -> 1179 parse_sparse_map(Sparse, Reader, []); 1180parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) -> 1181 {Sparse#sparse_array.entries, Reader}. 1182parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) -> 1183 case read_block(Reader) of 1184 eof -> 1185 throw({error, eof}); 1186 {ok, Block, Reader2} -> 1187 Sparse2 = to_sparse_array(Block), 1188 parse_sparse_map(Sparse2, Reader2, Entries++Acc) 1189 end; 1190parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) -> 1191 Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) -> 1192 A =< B 1193 end, Entries++Acc), 1194 {Sorted, Reader}. 1195 1196%% Defined by taking the sum of the unsigned byte values of the 1197%% entire header record, treating the checksum bytes to as ASCII spaces 1198compute_checksum(<<H1:?V7_CHKSUM/binary, 1199 H2:?V7_CHKSUM_LEN/binary, 1200 Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary, 1201 _/binary>>) -> 1202 C0 = checksum(H1) + (byte_size(H2) * $\s), 1203 C1 = checksum(Rest), 1204 C0 + C1. 1205 1206compute_signed_checksum(<<H1:?V7_CHKSUM/binary, 1207 H2:?V7_CHKSUM_LEN/binary, 1208 Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary, 1209 _/binary>>) -> 1210 C0 = signed_checksum(H1) + (byte_size(H2) * $\s), 1211 C1 = signed_checksum(Rest), 1212 C0 + C1. 1213 1214%% Returns the checksum of a binary. 1215checksum(Bin) -> checksum(Bin, 0). 1216checksum(<<A/unsigned,Rest/binary>>, Sum) -> 1217 checksum(Rest, Sum+A); 1218checksum(<<>>, Sum) -> Sum. 1219 1220signed_checksum(Bin) -> signed_checksum(Bin, 0). 1221signed_checksum(<<A/signed,Rest/binary>>, Sum) -> 1222 signed_checksum(Rest, Sum+A); 1223signed_checksum(<<>>, Sum) -> Sum. 1224 1225-spec parse_numeric(binary()) -> non_neg_integer(). 1226parse_numeric(<<>>) -> 1227 0; 1228parse_numeric(<<First, _/binary>> = Bin) -> 1229 %% check for base-256 format first 1230 %% if the bit is set, then all following bits constitute a two's 1231 %% complement encoded number in big-endian byte order 1232 if 1233 First band 16#80 =/= 0 -> 1234 %% Handling negative numbers relies on the following identity: 1235 %% -a-1 == ^a 1236 %% If the number is negative, we use an inversion mask to invert 1237 %% the data bytes and treat the value as an unsigned number 1238 Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end, 1239 Bytes = binary:bin_to_list(Bin), 1240 Reducer = fun (C, {I, X}) -> 1241 C1 = C bxor Inv, 1242 C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end, 1243 if (X bsr 56) > 0 -> 1244 throw({error,integer_overflow}); 1245 true -> 1246 {I+1, (X bsl 8) bor C2} 1247 end 1248 end, 1249 {_, N} = lists:foldl(Reducer, {0,0}, Bytes), 1250 if (N bsr 63) > 0 -> 1251 throw({error, integer_overflow}); 1252 true -> 1253 if Inv =:= 16#FF -> 1254 -1 bxor N; 1255 true -> 1256 N 1257 end 1258 end; 1259 true -> 1260 %% normal case is an octal number 1261 parse_octal(Bin) 1262 end. 1263 1264parse_octal(Bin) when is_binary(Bin) -> 1265 %% skip leading/trailing zero bytes and spaces 1266 do_parse_octal(Bin, <<>>). 1267do_parse_octal(<<>>, <<>>) -> 1268 0; 1269do_parse_octal(<<>>, Acc) -> 1270 case io_lib:fread("~8u", binary:bin_to_list(Acc)) of 1271 {error, _} -> throw({error, invalid_tar_checksum}); 1272 {ok, [Octal], []} -> Octal; 1273 {ok, _, _} -> throw({error, invalid_tar_checksum}) 1274 end; 1275do_parse_octal(<<$\s,Rest/binary>>, Acc) -> 1276 do_parse_octal(Rest, Acc); 1277do_parse_octal(<<0, Rest/binary>>, Acc) -> 1278 do_parse_octal(Rest, Acc); 1279do_parse_octal(<<C, Rest/binary>>, Acc) -> 1280 do_parse_octal(Rest, <<Acc/binary, C>>). 1281 1282parse_string(Bin) when is_binary(Bin) -> 1283 do_parse_string(Bin, <<>>). 1284do_parse_string(<<>>, Acc) -> 1285 case unicode:characters_to_list(Acc) of 1286 Str when is_list(Str) -> 1287 Str; 1288 {incomplete, _Str, _Rest} -> 1289 binary:bin_to_list(Acc); 1290 {error, _Str, _Rest} -> 1291 throw({error, {bad_header, invalid_string}}) 1292 end; 1293do_parse_string(<<0, _/binary>>, Acc) -> 1294 do_parse_string(<<>>, Acc); 1295do_parse_string(<<C, Rest/binary>>, Acc) -> 1296 do_parse_string(Rest, <<Acc/binary, C>>). 1297 1298convert_header(Bin, #reader{pos=Pos}=Reader) 1299 when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 -> 1300 case get_format(Bin) of 1301 ?FORMAT_UNKNOWN -> 1302 throw({error, bad_header}); 1303 {ok, Format, V7} -> 1304 unpack_format(Format, V7, Bin, Reader); 1305 {error, Reason} -> 1306 throw({error, {bad_header, Reason}}) 1307 end; 1308convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE -> 1309 throw({error, misaligned_read, Pos}); 1310convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 -> 1311 eof; 1312convert_header(_Bin, _Reader) -> 1313 throw({error, eof}). 1314 1315%% Creates a partially-populated header record based 1316%% on the provided file_info record. If the file is 1317%% a symlink, then `link` is used as the link target. 1318%% If the file is a directory, a slash is appended to the name. 1319fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) -> 1320 BaseHeader = #tar_header{name=Name, 1321 mtime=0, 1322 atime=0, 1323 ctime=0, 1324 mode=Fi#file_info.mode, 1325 typeflag=?TYPE_REGULAR}, 1326 do_fileinfo_to_header(BaseHeader, Fi, Link). 1327 1328do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) -> 1329 Header#tar_header{size=Size,typeflag=?TYPE_REGULAR}; 1330do_fileinfo_to_header(#tar_header{name=Name}=Header, 1331 #file_info{type=directory}, _Link) -> 1332 Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR}; 1333do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) -> 1334 Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link}; 1335do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) 1336 when (Mode band ?S_IFMT) =:= ?S_IFCHR -> 1337 Header#tar_header{typeflag=?TYPE_CHAR, 1338 devmajor=Fi#file_info.major_device, 1339 devminor=Fi#file_info.minor_device}; 1340do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) 1341 when (Mode band ?S_IFMT) =:= ?S_IFBLK -> 1342 Header#tar_header{typeflag=?TYPE_BLOCK, 1343 devmajor=Fi#file_info.major_device, 1344 devminor=Fi#file_info.minor_device}; 1345do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link) 1346 when (Mode band ?S_IFMT) =:= ?S_FIFO -> 1347 Header#tar_header{typeflag=?TYPE_FIFO}; 1348do_fileinfo_to_header(Header, Fi, _Link) -> 1349 {error, {invalid_file_type, Header#tar_header.name, Fi}}. 1350 1351is_ascii(Str) when is_list(Str) -> 1352 not lists:any(fun (Char) -> Char >= 16#80 end, Str); 1353is_ascii(Bin) when is_binary(Bin) -> 1354 is_ascii1(Bin). 1355 1356is_ascii1(<<>>) -> 1357 true; 1358is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 -> 1359 false; 1360is_ascii1(<<_, Rest/binary>>) -> 1361 is_ascii1(Rest). 1362 1363to_ascii(Str) when is_list(Str) -> 1364 case is_ascii(Str) of 1365 true -> 1366 unicode:characters_to_binary(Str); 1367 false -> 1368 Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str), 1369 unicode:characters_to_binary(Chars) 1370 end; 1371to_ascii(Bin) when is_binary(Bin) -> 1372 to_ascii(Bin, <<>>). 1373to_ascii(<<>>, Acc) -> 1374 Acc; 1375to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 -> 1376 to_ascii(Rest, <<Acc/binary,C>>); 1377to_ascii(<<_, Rest/binary>>, Acc) -> 1378 to_ascii(Rest, Acc). 1379 1380is_header_only_type(?TYPE_SYMLINK) -> true; 1381is_header_only_type(?TYPE_LINK) -> true; 1382is_header_only_type(?TYPE_DIR) -> true; 1383is_header_only_type(_) -> false. 1384 1385foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts) 1386 when is_function(Fun,4) -> 1387 case foldl_read0(Reader, Fun, Accu, Opts) of 1388 {ok, Result, _Reader2} -> 1389 Result; 1390 {error, _} = Err -> 1391 Err 1392 end; 1393foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) -> 1394 {error, {read_mode_expected, Access}}; 1395foldl_read(TarName, Fun, Accu, #read_opts{}=Opts) 1396 when is_function(Fun,4) -> 1397 try open(TarName, [read|Opts#read_opts.open_mode]) of 1398 {ok, #reader{access=read}=Reader} -> 1399 try 1400 foldl_read(Reader, Fun, Accu, Opts) 1401 after 1402 _ = close(Reader) 1403 end; 1404 {error, _} = Err -> 1405 Err 1406 catch 1407 throw:Err -> 1408 Err 1409 end. 1410 1411foldl_read0(Reader, Fun, Accu, Opts) -> 1412 try foldl_read1(Fun, Accu, Reader, Opts, #{}) of 1413 {ok,_,_} = Ok -> 1414 Ok 1415 catch 1416 throw:{error, {Reason, Format, Args}} -> 1417 read_verbose(Opts, Format, Args), 1418 {error, Reason}; 1419 throw:Err -> 1420 Err 1421 end. 1422 1423foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) -> 1424 {ok, Reader1} = skip_unread(Reader0), 1425 case get_header(Reader1) of 1426 eof -> 1427 Fun(eof, Reader1, Opts, Accu0); 1428 {Header, Reader2} -> 1429 case Header#tar_header.typeflag of 1430 ?TYPE_X_HEADER -> 1431 {ExtraHeaders2, Reader3} = parse_pax(Reader2), 1432 ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2), 1433 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3); 1434 ?TYPE_GNU_LONGNAME -> 1435 {RealName, Reader3} = get_real_name(Reader2), 1436 ExtraHeaders2 = maps:put(?PAX_PATH, 1437 parse_string(RealName), ExtraHeaders), 1438 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); 1439 ?TYPE_GNU_LONGLINK -> 1440 {RealName, Reader3} = get_real_name(Reader2), 1441 ExtraHeaders2 = maps:put(?PAX_LINKPATH, 1442 parse_string(RealName), ExtraHeaders), 1443 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); 1444 _ -> 1445 Header1 = merge_pax(Header, ExtraHeaders), 1446 {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0), 1447 foldl_read1(Fun, NewAccu, Reader3, Opts, #{}) 1448 end 1449 end. 1450 1451%% Applies all known PAX attributes to the current tar header 1452-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header(). 1453merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) -> 1454 do_merge_pax(Header, maps:to_list(ExtraHeaders)). 1455 1456do_merge_pax(Header, []) -> 1457 Header; 1458do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) -> 1459 do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest); 1460do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) -> 1461 do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest); 1462do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) -> 1463 do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest); 1464do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) -> 1465 do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest); 1466do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) -> 1467 Uid2 = binary_to_integer(Uid), 1468 do_merge_pax(Header#tar_header{uid=Uid2}, Rest); 1469do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) -> 1470 Gid2 = binary_to_integer(Gid), 1471 do_merge_pax(Header#tar_header{gid=Gid2}, Rest); 1472do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) -> 1473 Atime2 = parse_pax_time(Atime), 1474 do_merge_pax(Header#tar_header{atime=Atime2}, Rest); 1475do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) -> 1476 Mtime2 = parse_pax_time(Mtime), 1477 do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest); 1478do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) -> 1479 Ctime2 = parse_pax_time(Ctime), 1480 do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest); 1481do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) -> 1482 Size2 = binary_to_integer(Size), 1483 do_merge_pax(Header#tar_header{size=Size2}, Rest); 1484do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) -> 1485 do_merge_pax(Header, Rest); 1486do_merge_pax(Header, [_Ignore|Rest]) -> 1487 do_merge_pax(Header, Rest). 1488 1489%% Returns the time since UNIX epoch as a datetime 1490-spec parse_pax_time(binary()) -> tar_time(). 1491parse_pax_time(Bin) when is_binary(Bin) -> 1492 TotalNano = case binary:split(Bin, [<<$.>>]) of 1493 [SecondsStr, NanoStr0] -> 1494 Seconds = binary_to_integer(SecondsStr), 1495 if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE -> 1496 %% right pad 1497 PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0), 1498 Padding = binary:copy(<<$0>>, PaddingN), 1499 NanoStr1 = <<NanoStr0/binary,Padding/binary>>, 1500 Nano = binary_to_integer(NanoStr1), 1501 (Seconds*?BILLION)+Nano; 1502 byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE -> 1503 %% right truncate 1504 NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE), 1505 Nano = binary_to_integer(NanoStr1), 1506 (Seconds*?BILLION)+Nano; 1507 true -> 1508 (Seconds*?BILLION)+binary_to_integer(NanoStr0) 1509 end; 1510 [SecondsStr] -> 1511 binary_to_integer(SecondsStr)*?BILLION 1512 end, 1513 %% truncate to microseconds 1514 Micro = TotalNano div 1000, 1515 Mega = Micro div 1000000000000, 1516 Secs = Micro div 1000000 - (Mega*1000000), 1517 Secs. 1518 1519%% Given a regular file reader, reads the whole file and 1520%% parses all extended attributes it contains. 1521parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1522 {#{}, Handle}; 1523parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> 1524 case do_read(Handle0, NumBytes) of 1525 {ok, Bytes, Handle1} -> 1526 do_parse_pax(Handle1, Bytes, #{}); 1527 {error, _} = Err -> 1528 throw(Err) 1529 end. 1530 1531do_parse_pax(Reader, <<>>, Headers) -> 1532 {Headers, Reader}; 1533do_parse_pax(Reader, Bin, Headers) -> 1534 {Key, Value, Residual} = parse_pax_record(Bin), 1535 NewHeaders = maps:put(Key, Value, Headers), 1536 do_parse_pax(Reader, Residual, NewHeaders). 1537 1538%% Parse an extended attribute 1539parse_pax_record(Bin) when is_binary(Bin) -> 1540 case binary:split(Bin, [<<$\n>>]) of 1541 [Record, Residual] -> 1542 case [X || X <- binary:split(Record, [<<$\s>>], [global]), X =/= <<>>] of 1543 [_Len, Record1] -> 1544 case [X || X <- binary:split(Record1, [<<$=>>], [global]), X =/= <<>>] of 1545 [AttrName, AttrValue] -> 1546 {AttrName, AttrValue, Residual}; 1547 _Other -> 1548 throw({error, malformed_pax_record}) 1549 end; 1550 _Other -> 1551 throw({error, malformed_pax_record}) 1552 end; 1553 _Other -> 1554 throw({error, malformed_pax_record}) 1555 end. 1556 1557get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1558 {"", Handle}; 1559get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> 1560 case do_read(Handle0, NumBytes) of 1561 {ok, RealName, Handle1} -> 1562 {RealName, Handle1}; 1563 {error, _} = Err -> 1564 throw(Err) 1565 end; 1566get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) -> 1567 case do_read(Reader0, NumBytes) of 1568 {ok, RealName, Reader1} -> 1569 {RealName, Reader1}; 1570 {error, _} = Err -> 1571 throw(Err) 1572 end. 1573 1574%% Skip the remaining bytes for the current file entry 1575skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) -> 1576 Padding = skip_padding(Size), 1577 AbsPos = Handle0#reader.pos + (Size-Pos) + Padding, 1578 case do_position(Handle0, AbsPos) of 1579 {ok, _, Handle1} -> 1580 Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size}; 1581 Err -> 1582 throw(Err) 1583 end; 1584skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) -> 1585 case do_read(Reader, Size-Pos) of 1586 {ok, _, Reader2} -> 1587 Reader2; 1588 Err -> 1589 throw(Err) 1590 end. 1591 1592skip_padding(0) -> 1593 0; 1594skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 -> 1595 0; 1596skip_padding(Size) when Size =< ?BLOCK_SIZE -> 1597 ?BLOCK_SIZE - Size; 1598skip_padding(Size) -> 1599 ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE). 1600 1601skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 -> 1602 Padding = skip_padding(Pos + ?BLOCK_SIZE), 1603 AbsPos = Pos + Padding, 1604 case do_position(Reader0, AbsPos) of 1605 {ok, _, Reader1} -> 1606 {ok, Reader1}; 1607 Err -> 1608 throw(Err) 1609 end; 1610skip_unread(#reader{}=Reader) -> 1611 {ok, Reader}; 1612skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1613 skip_unread(Handle); 1614skip_unread(#reg_file_reader{}=Reader) -> 1615 #reg_file_reader{handle=Handle} = skip_file(Reader), 1616 {ok, Handle}; 1617skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) -> 1618 skip_unread(Handle); 1619skip_unread(#sparse_file_reader{}=Reader) -> 1620 #sparse_file_reader{handle=Handle} = skip_file(Reader), 1621 {ok, Handle}. 1622 1623write_extracted_element(#tar_header{name=Name,typeflag=Type}, 1624 Bin, 1625 #read_opts{output=memory}=Opts) -> 1626 case typeflag(Type) of 1627 regular -> 1628 read_verbose(Opts, "x ~ts~n", [Name]), 1629 {ok, {Name, Bin}}; 1630 _ -> 1631 ok 1632 end; 1633write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) -> 1634 Name1 = make_safe_path(Name0, Opts), 1635 Created = 1636 case typeflag(Header#tar_header.typeflag) of 1637 regular -> 1638 create_regular(Name1, Name0, Bin, Opts); 1639 directory -> 1640 read_verbose(Opts, "x ~ts~n", [Name0]), 1641 create_extracted_dir(Name1, Opts); 1642 symlink -> 1643 read_verbose(Opts, "x ~ts~n", [Name0]), 1644 create_symlink(Name1, Header#tar_header.linkname, Opts); 1645 Device when Device =:= char orelse Device =:= block -> 1646 %% char/block devices will be created as empty files 1647 %% and then have their major/minor device set later 1648 create_regular(Name1, Name0, <<>>, Opts); 1649 fifo -> 1650 %% fifo devices will be created as empty files 1651 create_regular(Name1, Name0, <<>>, Opts); 1652 Other -> % Ignore. 1653 read_verbose(Opts, "x ~ts - unsupported type ~p~n", 1654 [Name0, Other]), 1655 not_written 1656 end, 1657 case Created of 1658 ok -> set_extracted_file_info(Name1, Header); 1659 not_written -> ok 1660 end. 1661 1662make_safe_path([$/|Path], Opts) -> 1663 make_safe_path(Path, Opts); 1664make_safe_path(Path, #read_opts{cwd=Cwd}) -> 1665 case safe_relative_path_links(Path, Cwd) of 1666 unsafe -> 1667 throw({error,{Path,unsafe_path}}); 1668 SafePath -> 1669 filename:absname(SafePath, Cwd) 1670 end. 1671 1672safe_relative_path_links(Path, Cwd) -> 1673 case filename:pathtype(Path) of 1674 relative -> safe_relative_path_links(filename:split(Path), Cwd, [], ""); 1675 _ -> unsafe 1676 end. 1677 1678safe_relative_path_links([], _Cwd, _PrevLinks, Acc) -> 1679 Acc; 1680 1681safe_relative_path_links([Segment | Segments], Cwd, PrevLinks, Acc) -> 1682 AccSegment = join(Acc, Segment), 1683 1684 case mix_hex_filename:safe_relative_path(AccSegment) of 1685 unsafe -> 1686 unsafe; 1687 1688 SafeAccSegment -> 1689 case file:read_link(join(Cwd, SafeAccSegment)) of 1690 {ok, LinkPath} -> 1691 case lists:member(LinkPath, PrevLinks) of 1692 true -> 1693 unsafe; 1694 false -> 1695 case safe_relative_path_links(filename:split(LinkPath), Cwd, [LinkPath | PrevLinks], Acc) of 1696 unsafe -> unsafe; 1697 NewAcc -> safe_relative_path_links(Segments, Cwd, [], NewAcc) 1698 end 1699 end; 1700 1701 {error, _} -> 1702 safe_relative_path_links(Segments, Cwd, PrevLinks, SafeAccSegment) 1703 end 1704 end. 1705 1706join([], Path) -> Path; 1707join(Left, Right) -> filename:join(Left, Right). 1708 1709create_regular(Name, NameInArchive, Bin, Opts) -> 1710 case write_extracted_file(Name, Bin, Opts) of 1711 not_written -> 1712 read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]), 1713 not_written; 1714 Ok -> 1715 read_verbose(Opts, "x ~ts~n", [NameInArchive]), 1716 Ok 1717 end. 1718 1719create_extracted_dir(Name, _Opts) -> 1720 case file:make_dir(Name) of 1721 ok -> ok; 1722 {error,enotsup} -> not_written; 1723 {error,eexist} -> not_written; 1724 {error,enoent} -> make_dirs(Name, dir); 1725 {error,Reason} -> throw({error, Reason}) 1726 end. 1727 1728create_symlink(Name, Linkname, Opts) -> 1729 case file:make_symlink(Linkname, Name) of 1730 ok -> ok; 1731 {error,enoent} -> 1732 ok = make_dirs(Name, file), 1733 create_symlink(Name, Linkname, Opts); 1734 {error,eexist} -> not_written; 1735 {error,enotsup} -> 1736 read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]), 1737 not_written; 1738 {error,Reason} -> throw({error, Reason}) 1739 end. 1740 1741write_extracted_file(Name, Bin, Opts) -> 1742 Write = 1743 case Opts#read_opts.keep_old_files of 1744 true -> 1745 case file:read_file_info(Name) of 1746 {ok, _} -> false; 1747 _ -> true 1748 end; 1749 false -> true 1750 end, 1751 case Write of 1752 true -> write_file(Name, Bin); 1753 false -> not_written 1754 end. 1755 1756write_file(Name, Bin) -> 1757 case file:write_file(Name, Bin) of 1758 ok -> ok; 1759 {error,enoent} -> 1760 case make_dirs(Name, file) of 1761 ok -> 1762 write_file(Name, Bin); 1763 {error,Reason} -> 1764 throw({error, Reason}) 1765 end; 1766 {error,Reason} -> 1767 throw({error, Reason}) 1768 end. 1769 1770set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok; 1771set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK}) -> ok; 1772set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) -> 1773 set_device_info(Name, Header); 1774set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) -> 1775 set_device_info(Name, Header); 1776set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) -> 1777 Info = #file_info{mode=Mode, mtime=Mtime}, 1778 file:write_file_info(Name, Info, [{time, posix}]). 1779 1780set_device_info(Name, #tar_header{}=Header) -> 1781 Mtime = Header#tar_header.mtime, 1782 Mode = Header#tar_header.mode, 1783 Devmajor = Header#tar_header.devmajor, 1784 Devminor = Header#tar_header.devminor, 1785 Info = #file_info{ 1786 mode=Mode, 1787 mtime=Mtime, 1788 major_device=Devmajor, 1789 minor_device=Devminor 1790 }, 1791 file:write_file_info(Name, Info). 1792 1793%% Makes all directories leading up to the file. 1794 1795make_dirs(Name, file) -> 1796 filelib:ensure_dir(Name); 1797make_dirs(Name, dir) -> 1798 filelib:ensure_dir(filename:join(Name,"*")). 1799 1800%% Prints the message on if the verbose option is given (for reading). 1801read_verbose(#read_opts{verbose=true}, Format, Args) -> 1802 io:format(Format, Args); 1803read_verbose(_, _, _) -> 1804 ok. 1805 1806%% Prints the message on if the verbose option is given. 1807add_verbose(#add_opts{verbose=true}, Format, Args) -> 1808 io:format(Format, Args); 1809add_verbose(_, _, _) -> 1810 ok. 1811 1812%%%%%%%%%%%%%%%%%% 1813%% I/O primitives 1814%%%%%%%%%%%%%%%%%% 1815 1816do_write(#reader{handle=Handle,func=Fun}=Reader0, Data) 1817 when is_function(Fun,2) -> 1818 case Fun(write,{Handle,Data}) of 1819 ok -> 1820 {ok, Pos, Reader1} = do_position(Reader0, {cur,0}), 1821 {ok, Reader1#reader{pos=Pos}}; 1822 {error, _} = Err -> 1823 Err 1824 end. 1825 1826do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts) 1827 when is_function(Fun, 2) -> 1828 do_copy(Reader, Source, Opts#add_opts{chunk_size=65536}); 1829do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize}) 1830 when is_function(Fun, 2) -> 1831 case file:open(Source, [read, binary]) of 1832 {ok, SourceFd} -> 1833 case copy_chunked(Reader, SourceFd, ChunkSize, 0) of 1834 {ok, _Copied, _Reader2} = Ok-> 1835 _ = file:close(SourceFd), 1836 Ok; 1837 Err -> 1838 _ = file:close(SourceFd), 1839 throw(Err) 1840 end; 1841 Err -> 1842 throw(Err) 1843 end. 1844 1845copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) -> 1846 case file:read(Source, ChunkSize) of 1847 {ok, Bin} -> 1848 {ok, Reader2} = do_write(Reader, Bin), 1849 copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin)); 1850 eof -> 1851 {ok, Copied, Reader}; 1852 Other -> 1853 Other 1854 end. 1855 1856 1857do_position(#reader{handle=Handle,func=Fun}=Reader, Pos) 1858 when is_function(Fun,2)-> 1859 case Fun(position, {Handle,Pos}) of 1860 {ok, NewPos} -> 1861 %% since Pos may not always be an absolute seek, 1862 %% make sure we update the reader with the new absolute position 1863 {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}), 1864 {ok, NewPos, Reader#reader{pos=AbsPos}}; 1865 Other -> 1866 Other 1867 end. 1868 1869do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) -> 1870 NumBytes = Size - Pos, 1871 ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end, 1872 case do_read(Handle, ActualLen) of 1873 {ok, Bin, Handle2} -> 1874 NewPos = Pos + ActualLen, 1875 NumBytes2 = Size - NewPos, 1876 Reader1 = Reader#reg_file_reader{ 1877 handle=Handle2, 1878 pos=NewPos, 1879 num_bytes=NumBytes2}, 1880 {ok, Bin, Reader1}; 1881 Other -> 1882 Other 1883 end; 1884do_read(#sparse_file_reader{}=Reader, Len) -> 1885 do_sparse_read(Reader, Len); 1886do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len) 1887 when is_function(Fun,2)-> 1888 %% Always convert to binary internally 1889 case Fun(read2,{Handle,Len}) of 1890 {ok, List} when is_list(List) -> 1891 Bin = list_to_binary(List), 1892 NewPos = Pos+byte_size(Bin), 1893 {ok, Bin, Reader#reader{pos=NewPos}}; 1894 {ok, Bin} when is_binary(Bin) -> 1895 NewPos = Pos+byte_size(Bin), 1896 {ok, Bin, Reader#reader{pos=NewPos}}; 1897 Other -> 1898 Other 1899 end. 1900 1901 1902do_sparse_read(Reader, Len) -> 1903 do_sparse_read(Reader, Len, <<>>). 1904 1905do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries] 1906 }=Reader0, Len, Acc) -> 1907 %% skip all empty fragments 1908 Reader1 = Reader0#sparse_file_reader{sparse_map=Entries}, 1909 do_sparse_read(Reader1, Len, Acc); 1910do_sparse_read(#sparse_file_reader{sparse_map=[], 1911 pos=Pos,size=Size}=Reader0, Len, Acc) 1912 when Pos < Size -> 1913 %% if there are no more fragments, it is possible that there is one last sparse hole 1914 %% this behaviour matches the BSD tar utility 1915 %% however, GNU tar stops returning data even if we haven't reached the end 1916 {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len), 1917 do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1918do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) -> 1919 {ok, Acc, Reader}; 1920do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) -> 1921 {ok, Acc, Reader}; 1922do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_], 1923 pos=Pos}=Reader0, Len, Acc) 1924 when Pos < Offset -> 1925 {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos), 1926 do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1927do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries], 1928 pos=Pos}=Reader0, Len, Acc) -> 1929 %% we're in a data fragment, so read from it 1930 %% end offset of fragment 1931 EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes, 1932 %% bytes left in fragment 1933 NumBytes = EndPos - Pos, 1934 ActualLen = if Len > NumBytes -> NumBytes; true -> Len end, 1935 case do_read(Reader0#sparse_file_reader.handle, ActualLen) of 1936 {ok, Bin, Handle} -> 1937 BytesRead = byte_size(Bin), 1938 ActualEndPos = Pos+BytesRead, 1939 Reader1 = if ActualEndPos =:= EndPos -> 1940 Reader0#sparse_file_reader{sparse_map=Entries}; 1941 true -> 1942 Reader0 1943 end, 1944 Size = Reader1#sparse_file_reader.size, 1945 NumBytes2 = Size - ActualEndPos, 1946 Reader2 = Reader1#sparse_file_reader{ 1947 handle=Handle, 1948 pos=ActualEndPos, 1949 num_bytes=NumBytes2}, 1950 do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1951 Other -> 1952 Other 1953 end. 1954 1955%% Reads a sparse hole ending at Offset 1956read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) -> 1957 N = Offset - Pos, 1958 N2 = if N > Len -> 1959 Len; 1960 true -> 1961 N 1962 end, 1963 Bin = <<0:N2/unit:8>>, 1964 NumBytes = Reader#sparse_file_reader.size - (Pos+N2), 1965 {ok, Bin, Reader#sparse_file_reader{ 1966 num_bytes=NumBytes, 1967 pos=Pos+N2}}. 1968 1969-spec do_close(reader()) -> ok | {error, term()}. 1970do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) -> 1971 Fun(close,Handle). 1972 1973%%%%%%%%%%%%%%%%%% 1974%% Option parsing 1975%%%%%%%%%%%%%%%%%% 1976 1977extract_opts(List) -> 1978 extract_opts(List, default_options()). 1979 1980table_opts(List) -> 1981 read_opts(List, default_options()). 1982 1983default_options() -> 1984 {ok, Cwd} = file:get_cwd(), 1985 #read_opts{cwd=Cwd}. 1986 1987extract_opts([keep_old_files|Rest], Opts) -> 1988 extract_opts(Rest, Opts#read_opts{keep_old_files=true}); 1989extract_opts([{cwd, Cwd}|Rest], Opts) -> 1990 extract_opts(Rest, Opts#read_opts{cwd=Cwd}); 1991extract_opts([{files, Files}|Rest], Opts) -> 1992 Set = ordsets:from_list(Files), 1993 extract_opts(Rest, Opts#read_opts{files=Set}); 1994extract_opts([memory|Rest], Opts) -> 1995 extract_opts(Rest, Opts#read_opts{output=memory}); 1996extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1997 extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); 1998extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1999 extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); 2000extract_opts([verbose|Rest], Opts) -> 2001 extract_opts(Rest, Opts#read_opts{verbose=true}); 2002extract_opts([Other|Rest], Opts) -> 2003 extract_opts(Rest, read_opts([Other], Opts)); 2004extract_opts([], Opts) -> 2005 Opts. 2006 2007read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 2008 read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); 2009read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 2010 read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); 2011read_opts([verbose|Rest], Opts) -> 2012 read_opts(Rest, Opts#read_opts{verbose=true}); 2013read_opts([_|Rest], Opts) -> 2014 read_opts(Rest, Opts); 2015read_opts([], Opts) -> 2016 Opts. 2017