1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1997-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% This module implements extraction/creation of tar archives. 21%% It supports reading most common tar formats, namely V7, STAR, 22%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR 23%% format, unless it must use PAX headers, in which case it produces PAX 24%% format. 25%% 26%% The following references where used: 27%% http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 28%% http://www.gnu.org/software/tar/manual/html_node/Standard.html 29%% http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html 30-module(erl_tar). 31 32-export([init/3, 33 create/2, create/3, 34 extract/1, extract/2, 35 table/1, table/2, t/1, tt/1, 36 open/2, close/1, 37 add/3, add/4, 38 format_error/1]). 39 40-include_lib("kernel/include/file.hrl"). 41-include_lib("erl_tar.hrl"). 42 43%% Converts the short error reason to a descriptive string. 44-spec format_error(term()) -> string(). 45format_error(invalid_tar_checksum) -> 46 "Checksum failed"; 47format_error(bad_header) -> 48 "Unrecognized tar header format"; 49format_error({bad_header, Reason}) -> 50 lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason])); 51format_error({invalid_header, negative_size}) -> 52 "Invalid header: negative size"; 53format_error(invalid_sparse_header_size) -> 54 "Invalid sparse header: negative size"; 55format_error(invalid_sparse_map_entry) -> 56 "Invalid sparse map entry"; 57format_error({invalid_sparse_map_entry, Reason}) -> 58 lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason])); 59format_error(invalid_end_of_archive) -> 60 "Invalid end of archive"; 61format_error(eof) -> 62 "Unexpected end of file"; 63format_error(integer_overflow) -> 64 "Failed to parse numeric: integer overflow"; 65format_error({misaligned_read, Pos}) -> 66 lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p", 67 [?BLOCK_SIZE, Pos])); 68format_error(invalid_gnu_1_0_sparsemap) -> 69 "Invalid GNU sparse map (version 1.0)"; 70format_error({invalid_gnu_0_1_sparsemap, Format}) -> 71 lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format])); 72format_error(unsafe_path) -> 73 "The path points above the current working directory"; 74format_error({Name,Reason}) -> 75 lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)])); 76format_error(Atom) when is_atom(Atom) -> 77 file:format_error(Atom); 78format_error(Term) -> 79 lists:flatten(io_lib:format("~tp", [Term])). 80 81%% Initializes a new reader given a custom file handle and I/O wrappers 82-spec init(UserData :: user_data(), write | read, file_op()) -> 83 {ok, tar_descriptor()} | {error, badarg}. 84init(UserData, AccessMode, Fun) when is_function(Fun, 2) -> 85 Reader = #reader{handle=UserData,access=AccessMode,func=Fun}, 86 {ok, Pos, Reader2} = do_position(Reader, {cur, 0}), 87 {ok, Reader2#reader{pos=Pos}}; 88init(_UserData, _AccessMode, _Fun) -> 89 {error, badarg}. 90 91%%%================================================================ 92%% Extracts all files from the tar file Name. 93-spec extract(Open :: open_type()) -> ok | {error, term()}. 94extract(Name) -> 95 extract(Name, []). 96 97%% Extracts (all) files from the tar file Name. 98%% Options accepted: 99%% - cooked: Opens the tar file without mode `raw` 100%% - compressed: Uncompresses the tar file when reading 101%% - memory: Returns the tar contents as a list of tuples {Name, Bin} 102%% - keep_old_files: Extracted files will not overwrite the destination 103%% - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract 104%% - verbose: Prints verbose information about the extraction, 105%% - {cwd, AbsoluteDir}: Sets the current working directory for the extraction 106-spec extract(Open :: open_type(), [extract_opt()]) -> 107 {ok, [{string(), binary()}]} | 108 {error, term()} | 109 ok. 110extract({binary, Bin}, Opts) when is_list(Opts) -> 111 do_extract({binary, Bin}, Opts); 112extract({file, Fd}, Opts) when is_list(Opts) -> 113 do_extract({file, Fd}, Opts); 114extract(#reader{}=Reader, Opts) when is_list(Opts) -> 115 do_extract(Reader, Opts); 116extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) -> 117 do_extract(Name, Opts). 118 119do_extract(Handle, Opts) when is_list(Opts) -> 120 Opts2 = extract_opts(Opts), 121 Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end, 122 foldl_read(Handle, fun extract1/4, Acc, Opts2). 123 124extract1(eof, Reader, _, Acc) when is_list(Acc) -> 125 {ok, {ok, lists:reverse(Acc)}, Reader}; 126extract1(eof, Reader, _, leading_slash) -> 127 error_logger:info_msg("erl_tar: removed leading '/' from member names\n"), 128 {ok, ok, Reader}; 129extract1(eof, Reader, _, Acc) -> 130 {ok, Acc, Reader}; 131extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) -> 132 case check_extract(Name, Opts) of 133 true -> 134 case do_read(Reader0, Size) of 135 {ok, Bin, Reader1} -> 136 Acc = extract2(Header, Bin, Opts, Acc0), 137 {ok, Acc, Reader1}; 138 {error, _} = Err -> 139 throw(Err) 140 end; 141 false -> 142 {ok, Acc0, skip_file(Reader0)} 143 end. 144 145extract2(Header, Bin, Opts, Acc) -> 146 case write_extracted_element(Header, Bin, Opts) of 147 ok -> 148 case Header of 149 #tar_header{name="/"++_} -> 150 leading_slash; 151 #tar_header{} -> 152 Acc 153 end; 154 {ok, NameBin} when is_list(Acc) -> 155 [NameBin | Acc]; 156 {error, _} = Err -> 157 throw(Err) 158 end. 159 160%% Checks if the file Name should be extracted. 161check_extract(_, #read_opts{files=all}) -> 162 true; 163check_extract(Name, #read_opts{files=Files}) -> 164 ordsets:is_element(Name, Files). 165 166%%%================================================================ 167%% The following table functions produce a list of information about 168%% the files contained in the archive. 169-type typeflag() :: regular | link | symlink | 170 char | block | directory | 171 fifo | reserved | unknown. 172-type mode() :: non_neg_integer(). 173-type uid() :: non_neg_integer(). 174-type gid() :: non_neg_integer(). 175 176-type tar_entry() :: {Name :: name_in_archive(), 177 Type :: typeflag(), 178 Size :: non_neg_integer(), 179 MTime :: tar_time(), 180 Mode :: mode(), 181 Uid :: uid(), 182 Gid :: gid()}. 183 184%% Returns a list of names of the files in the tar file Name. 185-spec table(Open :: open_type()) -> {ok, [name_in_archive()]} | {error, term()}. 186table(Name) -> 187 table(Name, []). 188 189%% Returns a list of names of the files in the tar file Name. 190%% Options accepted: compressed, verbose, cooked. 191-spec table(Open :: open_type(), [compressed | verbose | cooked]) -> 192 {ok, [name_in_archive() | tar_entry()]} | {error, term()}. 193table(Name, Opts) when is_list(Opts) -> 194 foldl_read(Name, fun table1/4, [], table_opts(Opts)). 195 196table1(eof, Reader, _, Result) -> 197 {ok, {ok, lists:reverse(Result)}, Reader}; 198table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) -> 199 Attrs = table1_attrs(Header, Verbose), 200 Reader2 = skip_file(Reader), 201 {ok, [Attrs|Result], Reader2}. 202 203%% Extracts attributes relevant to table1's output 204table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) -> 205 Type = typeflag(Typeflag), 206 Name = Header#tar_header.name, 207 Mtime = Header#tar_header.mtime, 208 Uid = Header#tar_header.uid, 209 Gid = Header#tar_header.gid, 210 Size = Header#tar_header.size, 211 {Name, Type, Size, Mtime, Mode, Uid, Gid}; 212table1_attrs(#tar_header{name=Name}, _Verbose) -> 213 Name. 214 215typeflag(?TYPE_REGULAR) -> regular; 216typeflag(?TYPE_REGULAR_A) -> regular; 217typeflag(?TYPE_GNU_SPARSE) -> regular; 218typeflag(?TYPE_CONT) -> regular; 219typeflag(?TYPE_LINK) -> link; 220typeflag(?TYPE_SYMLINK) -> symlink; 221typeflag(?TYPE_CHAR) -> char; 222typeflag(?TYPE_BLOCK) -> block; 223typeflag(?TYPE_DIR) -> directory; 224typeflag(?TYPE_FIFO) -> fifo; 225typeflag(_) -> unknown. 226 227%%%================================================================ 228%% Comments for printing the contents of a tape archive, 229%% meant to be invoked from the shell. 230 231%% Prints each filename in the archive 232-spec t(file:filename()) -> ok | {error, term()}. 233t(Name) when is_list(Name); is_binary(Name) -> 234 case table(Name) of 235 {ok, List} -> 236 lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List); 237 Error -> 238 Error 239 end. 240 241%% Prints verbose information about each file in the archive 242-spec tt(open_type()) -> ok | {error, term()}. 243tt(Name) -> 244 case table(Name, [verbose]) of 245 {ok, List} -> 246 lists:foreach(fun print_header/1, List); 247 Error -> 248 Error 249 end. 250 251%% Used by tt/1 to print a tar_entry tuple 252-spec print_header(tar_entry()) -> ok. 253print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) -> 254 io:format("~s~s ~4w/~-4w ~7w ~s ~s\n", 255 [type_to_string(Type), mode_to_string(Mode), 256 Uid, Gid, Size, time_to_string(Mtime), Name]). 257 258type_to_string(regular) -> "-"; 259type_to_string(directory) -> "d"; 260type_to_string(link) -> "l"; 261type_to_string(symlink) -> "s"; 262type_to_string(char) -> "c"; 263type_to_string(block) -> "b"; 264type_to_string(fifo) -> "f"; 265type_to_string(unknown) -> "?". 266 267%% Converts a numeric mode to its human-readable representation 268mode_to_string(Mode) -> 269 mode_to_string(Mode, "xwrxwrxwr", []). 270mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 -> 271 mode_to_string(Mode bsr 1, T, [C|Acc]); 272mode_to_string(Mode, [_|T], Acc) -> 273 mode_to_string(Mode bsr 1, T, [$-|Acc]); 274mode_to_string(_, [], Acc) -> 275 Acc. 276 277%% Converts a tar_time() (POSIX time) to a readable string 278time_to_string(Secs0) -> 279 Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH), 280 Secs = Epoch + Secs0, 281 DateTime0 = calendar:gregorian_seconds_to_datetime(Secs), 282 DateTime = calendar:universal_time_to_local_time(DateTime0), 283 {{Y, Mon, Day}, {H, Min, _}} = DateTime, 284 io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]). 285 286two_d(N) -> 287 tl(integer_to_list(N + 100)). 288 289month(1) -> "Jan"; 290month(2) -> "Feb"; 291month(3) -> "Mar"; 292month(4) -> "Apr"; 293month(5) -> "May"; 294month(6) -> "Jun"; 295month(7) -> "Jul"; 296month(8) -> "Aug"; 297month(9) -> "Sep"; 298month(10) -> "Oct"; 299month(11) -> "Nov"; 300month(12) -> "Dec". 301 302%%%================================================================ 303%% The open function with friends is to keep the file and binary api of this module 304-type open_type() :: file:filename_all() 305 | {binary, binary()} 306 | {file, file:io_device()}. 307-spec open(Open :: open_type(), [write | compressed | cooked]) -> 308 {ok, tar_descriptor()} | {error, term()}. 309open({binary, Bin}, Mode) when is_binary(Bin) -> 310 do_open({binary, Bin}, Mode); 311open({file, Fd}, Mode) -> 312 do_open({file, Fd}, Mode); 313open(Name, Mode) when is_list(Name); is_binary(Name) -> 314 do_open(Name, Mode). 315 316do_open(Name, Mode) when is_list(Mode) -> 317 case open_mode(Mode) of 318 {ok, Access, Raw, Opts} -> 319 open1(Name, Access, Raw, Opts); 320 {error, Reason} -> 321 {error, {Name, Reason}} 322 end. 323 324open1({binary,Bin0}=Handle, read, _Raw, Opts) when is_binary(Bin0) -> 325 Bin = case lists:member(compressed, Opts) of 326 true -> 327 try 328 zlib:gunzip(Bin0) 329 catch 330 _:_ -> Bin0 331 end; 332 false -> 333 Bin0 334 end, 335 336 case file:open(Bin, [ram,binary,read]) of 337 {ok,File} -> 338 {ok, #reader{handle=File,access=read,func=fun file_op/2}}; 339 {error, Reason} -> 340 {error, {Handle, Reason}} 341 end; 342open1({file, Fd}=Handle, read, [raw], Opts) -> 343 case not lists:member(compressed, Opts) of 344 true -> 345 Reader = #reader{handle=Fd,access=read,func=fun file_op/2}, 346 case do_position(Reader, {cur, 0}) of 347 {ok, Pos, Reader2} -> 348 {ok, Reader2#reader{pos=Pos}}; 349 {error, Reason} -> 350 {error, {Handle, Reason}} 351 end; 352 false -> 353 {error, {Handle, {incompatible_option, compressed}}} 354 end; 355open1({file, _Fd}=Handle, read, [], _Opts) -> 356 {error, {Handle, {incompatible_option, cooked}}}; 357open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) -> 358 case file:open(Name, Raw ++ [binary, Access|Opts]) of 359 {ok, File} -> 360 {ok, #reader{handle=File,access=Access,func=fun file_op/2}}; 361 {error, Reason} -> 362 {error, {Name, Reason}} 363 end. 364 365open_mode(Mode) -> 366 open_mode(Mode, false, [raw], []). 367 368open_mode(read, _, Raw, _) -> 369 {ok, read, Raw, []}; 370open_mode(write, _, Raw, _) -> 371 {ok, write, Raw, []}; 372open_mode([read|Rest], false, Raw, Opts) -> 373 open_mode(Rest, read, Raw, Opts); 374open_mode([write|Rest], false, Raw, Opts) -> 375 open_mode(Rest, write, Raw, Opts); 376open_mode([compressed|Rest], Access, Raw, Opts) -> 377 open_mode(Rest, Access, Raw, [compressed,read_ahead|Opts]); 378open_mode([cooked|Rest], Access, _Raw, Opts) -> 379 open_mode(Rest, Access, [], Opts); 380open_mode([], Access, Raw, Opts) -> 381 {ok, Access, Raw, Opts}; 382open_mode(_, _, _, _) -> 383 {error, einval}. 384 385file_op(write, {Fd, Data}) -> 386 file:write(Fd, Data); 387file_op(position, {Fd, Pos}) -> 388 file:position(Fd, Pos); 389file_op(read2, {Fd, Size}) -> 390 file:read(Fd, Size); 391file_op(close, Fd) -> 392 file:close(Fd). 393 394%% Closes a tar archive. 395-spec close(TarDescriptor :: tar_descriptor()) -> ok | {error, term()}. 396close(#reader{access=read}=Reader) -> 397 ok = do_close(Reader); 398close(#reader{access=write}=Reader) -> 399 {ok, Reader2} = pad_file(Reader), 400 ok = do_close(Reader2), 401 ok; 402close(_) -> 403 {error, einval}. 404 405pad_file(#reader{pos=Pos}=Reader) -> 406 %% There must be at least two zero blocks at the end. 407 PadCurrent = skip_padding(Pos+?BLOCK_SIZE), 408 Padding = <<0:PadCurrent/unit:8>>, 409 do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]). 410 411 412%%%================================================================ 413%% Creation/modification of tar archives 414 415%% Creates a tar file Name containing the given files. 416-spec create(file:filename_all(), filelist()) -> ok | {error, {string(), term()}}. 417create(Name, FileList) when is_list(Name); is_binary(Name) -> 418 create(Name, FileList, []). 419 420%% Creates a tar archive Name containing the given files. 421%% Accepted options: verbose, compressed, cooked 422-spec create(file:filename_all(), filelist(), [create_opt()]) -> 423 ok | {error, term()} | {error, {string(), term()}}. 424create(Name, FileList, Options) when is_list(Name); is_binary(Name) -> 425 Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) 426 end, Options), 427 case open(Name, [write|Mode]) of 428 {ok, TarFile} -> 429 do_create(TarFile, FileList, Options); 430 {error, _} = Err -> 431 Err 432 end. 433 434do_create(TarFile, [], _Opts) -> 435 close(TarFile); 436do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) -> 437 case add(TarFile, NameOrBin, NameInArchive, Opts) of 438 ok -> 439 do_create(TarFile, Rest, Opts); 440 {error, _} = Err -> 441 _ = close(TarFile), 442 Err 443 end; 444do_create(TarFile, [Name|Rest], Opts) -> 445 case add(TarFile, Name, Name, Opts) of 446 ok -> 447 do_create(TarFile, Rest, Opts); 448 {error, _} = Err -> 449 _ = close(TarFile), 450 Err 451 end. 452 453%% Adds a file to a tape archive. 454-type add_type() :: name_in_archive() 455 | {name_in_archive(), file:filename_all()}. 456-spec add(TarDescriptor, AddType, Options) -> ok | {error, term()} when 457 TarDescriptor :: tar_descriptor(), 458 AddType :: add_type(), 459 Options :: [add_opt()]. 460add(Reader, {NameInArchive, Name}, Opts) 461 when is_list(NameInArchive), is_list(Name) -> 462 do_add(Reader, Name, NameInArchive, Opts); 463add(Reader, {NameInArchive, Bin}, Opts) 464 when is_list(NameInArchive), is_binary(Bin) -> 465 do_add(Reader, Bin, NameInArchive, Opts); 466add(Reader, Name, Opts) when is_list(Name) -> 467 do_add(Reader, Name, Name, Opts). 468 469-spec add(TarDescriptor, Filename, NameInArchive, Options) -> 470 ok | {error, term()} when 471 TarDescriptor :: tar_descriptor(), 472 Filename :: file:filename_all(), 473 NameInArchive :: name_in_archive(), 474 Options :: [add_opt()]. 475add(Reader, NameOrBin, NameInArchive, Options) 476 when is_list(NameOrBin); is_binary(NameOrBin), 477 is_list(NameInArchive), is_list(Options) -> 478 do_add(Reader, NameOrBin, NameInArchive, Options). 479 480do_add(#reader{access=write}=Reader, Name, NameInArchive, Options) 481 when is_list(NameInArchive), is_list(Options) -> 482 RF = apply_file_info_opts_fun(Options, read_link_info), 483 Opts = #add_opts{read_info=RF}, 484 add1(Reader, Name, NameInArchive, add_opts(Options, Options, Opts)); 485do_add(#reader{access=read},_,_,_) -> 486 {error, eacces}; 487do_add(Reader,_,_,_) -> 488 {error, {badarg, Reader}}. 489 490add_opts([dereference|T], AllOptions, Opts) -> 491 RF = apply_file_info_opts_fun(AllOptions, read_file_info), 492 add_opts(T, AllOptions, Opts#add_opts{read_info=RF}); 493add_opts([verbose|T], AllOptions, Opts) -> 494 add_opts(T, AllOptions, Opts#add_opts{verbose=true}); 495add_opts([{chunks,N}|T], AllOptions, Opts) -> 496 add_opts(T, AllOptions, Opts#add_opts{chunk_size=N}); 497add_opts([{atime,Value}|T], AllOptions, Opts) -> 498 add_opts(T, AllOptions, Opts#add_opts{atime=Value}); 499add_opts([{mtime,Value}|T], AllOptions, Opts) -> 500 add_opts(T, AllOptions, Opts#add_opts{mtime=Value}); 501add_opts([{ctime,Value}|T], AllOptions, Opts) -> 502 add_opts(T, AllOptions, Opts#add_opts{ctime=Value}); 503add_opts([{uid,Value}|T], AllOptions, Opts) -> 504 add_opts(T, AllOptions, Opts#add_opts{uid=Value}); 505add_opts([{gid,Value}|T], AllOptions, Opts) -> 506 add_opts(T, AllOptions, Opts#add_opts{gid=Value}); 507add_opts([_|T], AllOptions, Opts) -> 508 add_opts(T, AllOptions, Opts); 509add_opts([], _AllOptions, Opts) -> 510 Opts. 511 512apply_file_info_opts(Opts, {ok, FileInfo}) -> 513 {ok, do_apply_file_info_opts(Opts, FileInfo)}; 514apply_file_info_opts(_Opts, Other) -> 515 Other. 516 517do_apply_file_info_opts([{atime,Value}|T], FileInfo) -> 518 do_apply_file_info_opts(T, FileInfo#file_info{atime=Value}); 519do_apply_file_info_opts([{mtime,Value}|T], FileInfo) -> 520 do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value}); 521do_apply_file_info_opts([{ctime,Value}|T], FileInfo) -> 522 do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value}); 523do_apply_file_info_opts([{uid,Value}|T], FileInfo) -> 524 do_apply_file_info_opts(T, FileInfo#file_info{uid=Value}); 525do_apply_file_info_opts([{gid,Value}|T], FileInfo) -> 526 do_apply_file_info_opts(T, FileInfo#file_info{gid=Value}); 527do_apply_file_info_opts([_|T], FileInfo) -> 528 do_apply_file_info_opts(T, FileInfo); 529do_apply_file_info_opts([], FileInfo) -> 530 FileInfo. 531 532apply_file_info_opts_fun(Options, InfoFunction) -> 533 fun(F) -> 534 apply_file_info_opts(Options, file:InfoFunction(F, [{time, posix}])) 535 end. 536 537add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts) 538 when is_list(Name) -> 539 Res = case ReadInfo(Name) of 540 {error, Reason0} -> 541 {error, {Name, Reason0}}; 542 {ok, #file_info{type=symlink}=Fi} -> 543 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 544 {ok, Linkname} = file:read_link(Name), 545 Header = fileinfo_to_header(NameInArchive, Fi, Linkname), 546 add_header(Reader, Header, Opts); 547 {ok, #file_info{type=regular}=Fi} -> 548 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 549 Header = fileinfo_to_header(NameInArchive, Fi, false), 550 {ok, Reader2} = add_header(Reader, Header, Opts), 551 FileSize = Header#tar_header.size, 552 {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts), 553 Padding = skip_padding(FileSize), 554 Pad = <<0:Padding/unit:8>>, 555 do_write(Reader3, Pad); 556 {ok, #file_info{type=directory}=Fi} -> 557 add_directory(Reader, Name, NameInArchive, Fi, Opts); 558 {ok, #file_info{}=Fi} -> 559 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 560 Header = fileinfo_to_header(NameInArchive, Fi, false), 561 add_header(Reader, Header, Opts) 562 end, 563 case Res of 564 ok -> ok; 565 {ok, _Reader} -> ok; 566 {error, _Reason} = Err -> Err 567 end; 568add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) -> 569 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 570 Now = os:system_time(seconds), 571 Header = #tar_header{ 572 name = NameInArchive, 573 size = byte_size(Bin), 574 typeflag = ?TYPE_REGULAR, 575 atime = add_opts_time(Opts#add_opts.atime, Now), 576 mtime = add_opts_time(Opts#add_opts.mtime, Now), 577 ctime = add_opts_time(Opts#add_opts.ctime, Now), 578 uid = Opts#add_opts.uid, 579 gid = Opts#add_opts.gid, 580 mode = 8#100644}, 581 {ok, Reader2} = add_header(Reader, Header, Opts), 582 Padding = skip_padding(byte_size(Bin)), 583 Data = [Bin, <<0:Padding/unit:8>>], 584 case do_write(Reader2, Data) of 585 {ok, _Reader3} -> ok; 586 {error, Reason} -> {error, {NameInArchive, Reason}} 587 end. 588 589add_opts_time(undefined, Now) -> Now; 590add_opts_time(Time, _Now) -> Time. 591 592add_directory(Reader, DirName, NameInArchive, Info, Opts) -> 593 case file:list_dir(DirName) of 594 {ok, []} -> 595 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 596 Header = fileinfo_to_header(NameInArchive, Info, false), 597 add_header(Reader, Header, Opts); 598 {ok, Files} -> 599 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 600 try add_files(Reader, Files, DirName, NameInArchive, Opts) of 601 ok -> ok; 602 {error, _} = Err -> Err 603 catch 604 throw:{error, {_Name, _Reason}} = Err -> Err; 605 throw:{error, Reason} -> {error, {DirName, Reason}} 606 end; 607 {error, Reason} -> 608 {error, {DirName, Reason}} 609 end. 610 611add_files(_Reader, [], _Dir, _DirInArchive, _Opts) -> 612 ok; 613add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) -> 614 FullName = filename:join(Dir, Name), 615 NameInArchive = filename:join(DirInArchive, Name), 616 Res = case Info(FullName) of 617 {error, Reason} -> 618 {error, {FullName, Reason}}; 619 {ok, #file_info{type=directory}=Fi} -> 620 add_directory(Reader, FullName, NameInArchive, Fi, Opts); 621 {ok, #file_info{type=symlink}=Fi} -> 622 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 623 {ok, Linkname} = file:read_link(FullName), 624 Header = fileinfo_to_header(NameInArchive, Fi, Linkname), 625 add_header(Reader, Header, Opts); 626 {ok, #file_info{type=regular}=Fi} -> 627 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 628 Header = fileinfo_to_header(NameInArchive, Fi, false), 629 {ok, Reader2} = add_header(Reader, Header, Opts), 630 FileSize = Header#tar_header.size, 631 {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts), 632 Padding = skip_padding(FileSize), 633 Pad = <<0:Padding/unit:8>>, 634 do_write(Reader3, Pad); 635 {ok, #file_info{}=Fi} -> 636 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 637 Header = fileinfo_to_header(NameInArchive, Fi, false), 638 add_header(Reader, Header, Opts) 639 end, 640 case Res of 641 ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts); 642 {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts); 643 {error, _} = Err -> Err 644 end. 645 646format_string(String, Size) when length(String) > Size -> 647 throw({error, {write_string, field_too_long}}); 648format_string(String, Size) -> 649 Ascii = to_ascii(String), 650 if byte_size(Ascii) < Size -> 651 [Ascii, 0]; 652 true -> 653 Ascii 654 end. 655 656format_octal(Octal) -> 657 iolist_to_binary(io_lib:fwrite("~.8B", [Octal])). 658 659add_header(#reader{}=Reader, #tar_header{}=Header, Opts) -> 660 {ok, Iodata} = build_header(Header, Opts), 661 do_write(Reader, Iodata). 662 663write_to_block(Block, IoData, Start) when is_list(IoData) -> 664 write_to_block(Block, iolist_to_binary(IoData), Start); 665write_to_block(Block, Bin, Start) when is_binary(Bin) -> 666 Size = byte_size(Bin), 667 <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block, 668 <<Head:Start/unit:8, Bin/binary, Rest/binary>>. 669 670build_header(#tar_header{}=Header, Opts) -> 671 #tar_header{ 672 name=Name, 673 mode=Mode, 674 uid=Uid, 675 gid=Gid, 676 size=Size, 677 typeflag=Type, 678 linkname=Linkname, 679 uname=Uname, 680 gname=Gname, 681 devmajor=Devmaj, 682 devminor=Devmin 683 } = Header, 684 Mtime = Header#tar_header.mtime, 685 686 Block0 = ?ZERO_BLOCK, 687 {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}), 688 Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode), 689 {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0), 690 {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1), 691 {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2), 692 {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3), 693 {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4), 694 {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN, 695 Linkname, ?PAX_LINKPATH, Pax5), 696 {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN, 697 Uname, ?PAX_UNAME, Pax6), 698 {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN, 699 Gname, ?PAX_GNAME, Pax7), 700 {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN, 701 Devmaj, ?PAX_NONE, Pax8), 702 {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN, 703 Devmin, ?PAX_NONE, Pax9), 704 {Block13, Pax11} = set_path(Block12, Pax10), 705 PaxEntry = case maps:size(Pax11) of 706 0 -> []; 707 _ -> build_pax_entry(Header, Pax11, Opts) 708 end, 709 Block14 = set_format(Block13, ?FORMAT_USTAR), 710 Block15 = set_checksum(Block14), 711 {ok, [PaxEntry, Block15]}. 712 713set_path(Block0, Pax) -> 714 %% only use ustar header when name is too long 715 case maps:get(?PAX_PATH, Pax, nil) of 716 nil -> 717 {Block0, Pax}; 718 PaxPath -> 719 case split_ustar_path(PaxPath) of 720 {ok, UstarName, UstarPrefix} -> 721 {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, 722 UstarName, ?PAX_NONE, #{}), 723 {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN, 724 UstarPrefix, ?PAX_NONE, #{}), 725 {Block2, maps:remove(?PAX_PATH, Pax)}; 726 false -> 727 {Block0, Pax} 728 end 729 end. 730 731set_format(Block0, Format) 732 when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX -> 733 Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC), 734 write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION); 735set_format(_Block, Format) -> 736 throw({error, {invalid_format, Format}}). 737 738set_checksum(Block) -> 739 Checksum = compute_checksum(Block), 740 write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum). 741 742build_pax_entry(Header, PaxAttrs, Opts) -> 743 Path = Header#tar_header.name, 744 Filename = filename:basename(Path), 745 Dir = filename:dirname(Path), 746 Path2 = filename:join([Dir, "PaxHeaders.0", Filename]), 747 AsciiPath = to_ascii(Path2), 748 Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN -> 749 binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1); 750 true -> 751 AsciiPath 752 end, 753 Keys = maps:keys(PaxAttrs), 754 SortedKeys = lists:sort(Keys), 755 PaxFile = build_pax_file(SortedKeys, PaxAttrs), 756 Size = byte_size(PaxFile), 757 Padding = (?BLOCK_SIZE - 758 (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE, 759 Pad = <<0:Padding/unit:8>>, 760 PaxHeader = #tar_header{ 761 name=unicode:characters_to_list(Path3), 762 size=Size, 763 mtime=Header#tar_header.mtime, 764 atime=Header#tar_header.atime, 765 ctime=Header#tar_header.ctime, 766 typeflag=?TYPE_X_HEADER 767 }, 768 {ok, PaxHeaderData} = build_header(PaxHeader, Opts), 769 [PaxHeaderData, PaxFile, Pad]. 770 771build_pax_file(Keys, PaxAttrs) -> 772 build_pax_file(Keys, PaxAttrs, []). 773build_pax_file([], _, Acc) -> 774 unicode:characters_to_binary(Acc); 775build_pax_file([K|Rest], Attrs, Acc) -> 776 V = maps:get(K, Attrs), 777 Size = sizeof(K) + sizeof(V) + 3, 778 Size2 = sizeof(Size) + Size, 779 Key = to_string(K), 780 Value = to_string(V), 781 Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])), 782 if byte_size(Record) =/= Size2 -> 783 Size3 = byte_size(Record), 784 Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]), 785 build_pax_file(Rest, Attrs, [Acc, Record2]); 786 true -> 787 build_pax_file(Rest, Attrs, [Acc, Record]) 788 end. 789 790sizeof(Bin) when is_binary(Bin) -> 791 byte_size(Bin); 792sizeof(List) when is_list(List) -> 793 length(List); 794sizeof(N) when is_integer(N) -> 795 byte_size(integer_to_binary(N)); 796sizeof(N) when is_float(N) -> 797 byte_size(float_to_binary(N)). 798 799to_string(Bin) when is_binary(Bin) -> 800 unicode:characters_to_list(Bin); 801to_string(List) when is_list(List) -> 802 List; 803to_string(N) when is_integer(N) -> 804 integer_to_list(N); 805to_string(N) when is_float(N) -> 806 float_to_list(N). 807 808split_ustar_path(Path) -> 809 Len = length(Path), 810 NotAscii = not is_ascii(Path), 811 if Len =< ?V7_NAME_LEN; NotAscii -> 812 false; 813 true -> 814 PathBin = binary:list_to_bin(Path), 815 case binary:split(PathBin, [<<$/>>], [global, trim_all]) of 816 [Part] when byte_size(Part) >= ?V7_NAME_LEN -> 817 false; 818 Parts -> 819 case lists:last(Parts) of 820 Name when byte_size(Name) >= ?V7_NAME_LEN -> 821 false; 822 Name -> 823 Parts2 = lists:sublist(Parts, length(Parts) - 1), 824 join_split_ustar_path(Parts2, {ok, Name, nil}) 825 end 826 end 827 end. 828 829join_split_ustar_path([], Acc) -> 830 Acc; 831join_split_ustar_path([Part|_], {ok, _, nil}) 832 when byte_size(Part) > ?USTAR_PREFIX_LEN -> 833 false; 834join_split_ustar_path([Part|_], {ok, _Name, Acc}) 835 when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN -> 836 false; 837join_split_ustar_path([Part|Rest], {ok, Name, nil}) -> 838 join_split_ustar_path(Rest, {ok, Name, Part}); 839join_split_ustar_path([Part|Rest], {ok, Name, Acc}) -> 840 join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}). 841 842write_octal(Block, Pos, Size, X) -> 843 Octal = zero_pad(format_octal(X), Size-1), 844 if byte_size(Octal) < Size -> 845 write_to_block(Block, Octal, Pos); 846 true -> 847 throw({error, {write_failed, octal_field_too_long}}) 848 end. 849 850write_string(Block, Pos, Size, Str, PaxAttr, Pax0) -> 851 NotAscii = not is_ascii(Str), 852 if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) -> 853 Pax1 = maps:put(PaxAttr, Str, Pax0), 854 {Block, Pax1}; 855 true -> 856 Formatted = format_string(Str, Size), 857 {write_to_block(Block, Formatted, Pos), Pax0} 858 end. 859write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) -> 860 %% attempt octal 861 Octal = zero_pad(format_octal(X), Size-1), 862 if byte_size(Octal) < Size -> 863 {write_to_block(Block, [Octal, 0], Pos), Pax0}; 864 PaxAttr =/= ?PAX_NONE -> 865 Pax1 = maps:put(PaxAttr, X, Pax0), 866 {Block, Pax1}; 867 true -> 868 throw({error, {write_failed, numeric_field_too_long}}) 869 end. 870 871zero_pad(Str, Size) when byte_size(Str) >= Size -> 872 Str; 873zero_pad(Str, Size) -> 874 Padding = Size - byte_size(Str), 875 Pad = binary:copy(<<$0>>, Padding), 876 <<Pad/binary, Str/binary>>. 877 878 879%%%================================================================ 880%% Functions for creating or modifying tar archives 881 882read_block(Reader) -> 883 case do_read(Reader, ?BLOCK_SIZE) of 884 eof -> 885 throw({error, eof}); 886 %% Two zero blocks mark the end of the archive 887 {ok, ?ZERO_BLOCK, Reader1} -> 888 case do_read(Reader1, ?BLOCK_SIZE) of 889 eof -> 890 % This is technically a malformed end-of-archive marker, 891 % as two ZERO_BLOCKs are expected as the marker, 892 % but if we've already made it this far, we should just ignore it 893 eof; 894 {ok, ?ZERO_BLOCK, _Reader2} -> 895 eof; 896 {ok, _Block, _Reader2} -> 897 throw({error, invalid_end_of_archive}); 898 {error,_} = Err -> 899 throw(Err) 900 end; 901 {ok, Block, Reader1} when is_binary(Block) -> 902 {ok, Block, Reader1}; 903 {error, _} = Err -> 904 throw(Err) 905 end. 906 907get_header(#reader{}=Reader) -> 908 case read_block(Reader) of 909 eof -> 910 eof; 911 {ok, Block, Reader1} -> 912 convert_header(Block, Reader1) 913 end. 914 915%% Converts the tar header to a record. 916to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 917 #header_v7{ 918 name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN), 919 mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN), 920 uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN), 921 gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN), 922 size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN), 923 mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN), 924 checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN), 925 typeflag=binary:at(Bin, ?V7_TYPE), 926 linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN) 927 }; 928to_v7(_) -> 929 {error, header_block_too_small}. 930 931to_gnu(#header_v7{}=V7, Bin) 932 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 933 #header_gnu{ 934 header_v7=V7, 935 magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN), 936 version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN), 937 uname=binary_part(Bin, 265, 32), 938 gname=binary_part(Bin, 297, 32), 939 devmajor=binary_part(Bin, 329, 8), 940 devminor=binary_part(Bin, 337, 8), 941 atime=binary_part(Bin, 345, 12), 942 ctime=binary_part(Bin, 357, 12), 943 sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)), 944 real_size=binary_part(Bin, 483, 12) 945 }. 946 947to_star(#header_v7{}=V7, Bin) 948 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 949 #header_star{ 950 header_v7=V7, 951 magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), 952 version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), 953 uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), 954 gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), 955 devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), 956 devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), 957 prefix=binary_part(Bin, 345, 131), 958 atime=binary_part(Bin, 476, 12), 959 ctime=binary_part(Bin, 488, 12), 960 trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN) 961 }. 962 963to_ustar(#header_v7{}=V7, Bin) 964 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 965 #header_ustar{ 966 header_v7=V7, 967 magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), 968 version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), 969 uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), 970 gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), 971 devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), 972 devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), 973 prefix=binary_part(Bin, 345, 155) 974 }. 975 976to_sparse_array(Bin) when is_binary(Bin) -> 977 MaxEntries = byte_size(Bin) div 24, 978 IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries), 979 Entries = parse_sparse_entries(Bin, MaxEntries-1, []), 980 #sparse_array{ 981 entries=Entries, 982 max_entries=MaxEntries, 983 is_extended=IsExtended 984 }. 985 986parse_sparse_entries(<<>>, _, Acc) -> 987 Acc; 988parse_sparse_entries(_, -1, Acc) -> 989 Acc; 990parse_sparse_entries(Bin, N, Acc) -> 991 case to_sparse_entry(binary_part(Bin, N*24, 24)) of 992 nil -> 993 parse_sparse_entries(Bin, N-1, Acc); 994 Entry = #sparse_entry{} -> 995 parse_sparse_entries(Bin, N-1, [Entry|Acc]) 996 end. 997 998-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>). 999to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 -> 1000 OffsetBin = binary_part(Bin, 0, 12), 1001 NumBytesBin = binary_part(Bin, 12, 12), 1002 case {OffsetBin, NumBytesBin} of 1003 {?EMPTY_ENTRY, ?EMPTY_ENTRY} -> 1004 nil; 1005 _ -> 1006 #sparse_entry{ 1007 offset=parse_numeric(OffsetBin), 1008 num_bytes=parse_numeric(NumBytesBin)} 1009 end. 1010 1011-spec get_format(binary()) -> {ok, pos_integer(), header_v7()} 1012 | ?FORMAT_UNKNOWN 1013 | {error, term()}. 1014get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1015 do_get_format(to_v7(Bin), Bin). 1016 1017do_get_format({error, _} = Err, _Bin) -> 1018 Err; 1019do_get_format(#header_v7{}=V7, Bin) 1020 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1021 Checksum = parse_octal(V7#header_v7.checksum), 1022 Chk1 = compute_checksum(Bin), 1023 Chk2 = compute_signed_checksum(Bin), 1024 if Checksum =/= Chk1 andalso Checksum =/= Chk2 -> 1025 ?FORMAT_UNKNOWN; 1026 true -> 1027 %% guess magic 1028 Ustar = to_ustar(V7, Bin), 1029 Star = to_star(V7, Bin), 1030 Magic = Ustar#header_ustar.magic, 1031 Version = Ustar#header_ustar.version, 1032 Trailer = Star#header_star.trailer, 1033 Format = if 1034 Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR -> 1035 ?FORMAT_STAR; 1036 Magic =:= ?MAGIC_USTAR -> 1037 ?FORMAT_USTAR; 1038 Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU -> 1039 ?FORMAT_GNU; 1040 true -> 1041 ?FORMAT_V7 1042 end, 1043 {ok, Format, V7} 1044 end. 1045 1046unpack_format(Format, #header_v7{}=V7, Bin, Reader) 1047 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1048 Mtime = parse_numeric(V7#header_v7.mtime), 1049 Header0 = #tar_header{ 1050 name=parse_string(V7#header_v7.name), 1051 mode=parse_numeric(V7#header_v7.mode), 1052 uid=parse_numeric(V7#header_v7.uid), 1053 gid=parse_numeric(V7#header_v7.gid), 1054 size=parse_numeric(V7#header_v7.size), 1055 mtime=Mtime, 1056 atime=Mtime, 1057 ctime=Mtime, 1058 typeflag=V7#header_v7.typeflag, 1059 linkname=parse_string(V7#header_v7.linkname) 1060 }, 1061 Typeflag = Header0#tar_header.typeflag, 1062 Header1 = if Format > ?FORMAT_V7 -> 1063 unpack_modern(Format, V7, Bin, Header0); 1064 true -> 1065 Name = Header0#tar_header.name, 1066 Header0#tar_header{name=safe_join_path("", Name)} 1067 end, 1068 HeaderOnly = is_header_only_type(Typeflag), 1069 Header2 = if HeaderOnly -> 1070 Header1#tar_header{size=0}; 1071 true -> 1072 Header1 1073 end, 1074 if Typeflag =:= ?TYPE_GNU_SPARSE -> 1075 Gnu = to_gnu(V7, Bin), 1076 RealSize = parse_numeric(Gnu#header_gnu.real_size), 1077 {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader), 1078 Header3 = Header2#tar_header{size=RealSize}, 1079 {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)}; 1080 true -> 1081 FileReader = #reg_file_reader{ 1082 handle=Reader, 1083 num_bytes=Header2#tar_header.size, 1084 size=Header2#tar_header.size, 1085 pos = 0 1086 }, 1087 {Header2, FileReader} 1088 end. 1089 1090unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0) 1091 when is_binary(Bin) -> 1092 Typeflag = Header0#tar_header.typeflag, 1093 Ustar = to_ustar(V7, Bin), 1094 H0 = Header0#tar_header{ 1095 uname=parse_string(Ustar#header_ustar.uname), 1096 gname=parse_string(Ustar#header_ustar.gname)}, 1097 H1 = if Typeflag =:= ?TYPE_CHAR 1098 orelse Typeflag =:= ?TYPE_BLOCK -> 1099 Ma = parse_numeric(Ustar#header_ustar.devmajor), 1100 Mi = parse_numeric(Ustar#header_ustar.devminor), 1101 H0#tar_header{ 1102 devmajor=Ma, 1103 devminor=Mi 1104 }; 1105 true -> 1106 H0 1107 end, 1108 {Prefix, H2} = case Format of 1109 ?FORMAT_USTAR -> 1110 {parse_string(Ustar#header_ustar.prefix), H1}; 1111 ?FORMAT_STAR -> 1112 Star = to_star(V7, Bin), 1113 Prefix0 = parse_string(Star#header_star.prefix), 1114 Atime0 = Star#header_star.atime, 1115 Atime = parse_numeric(Atime0), 1116 Ctime0 = Star#header_star.ctime, 1117 Ctime = parse_numeric(Ctime0), 1118 {Prefix0, H1#tar_header{ 1119 atime=Atime, 1120 ctime=Ctime 1121 }}; 1122 _ -> 1123 {"", H1} 1124 end, 1125 Name = H2#tar_header.name, 1126 H2#tar_header{name=safe_join_path(Prefix, Name)}. 1127 1128 1129safe_join_path([], Name) -> 1130 filename:join([Name]); 1131safe_join_path(Prefix, []) -> 1132 filename:join([Prefix]); 1133safe_join_path(Prefix, Name) -> 1134 filename:join(Prefix, Name). 1135 1136new_sparse_file_reader(Reader, Sparsemap, RealSize) -> 1137 true = validate_sparse_entries(Sparsemap, RealSize), 1138 #sparse_file_reader{ 1139 handle = Reader, 1140 num_bytes = RealSize, 1141 pos = 0, 1142 size = RealSize, 1143 sparse_map = Sparsemap}. 1144 1145validate_sparse_entries(Entries, RealSize) -> 1146 validate_sparse_entries(Entries, RealSize, 0, 0). 1147validate_sparse_entries([], _RealSize, _I, _LastOffset) -> 1148 true; 1149validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) -> 1150 Offset = Entry#sparse_entry.offset, 1151 NumBytes = Entry#sparse_entry.num_bytes, 1152 if 1153 Offset > ?MAX_INT64-NumBytes -> 1154 throw({error, {invalid_sparse_map_entry, offset_too_large}}); 1155 Offset+NumBytes > RealSize -> 1156 throw({error, {invalid_sparse_map_entry, offset_too_large}}); 1157 I > 0 andalso LastOffset > Offset -> 1158 throw({error, {invalid_sparse_map_entry, overlapping_offsets}}); 1159 true -> 1160 ok 1161 end, 1162 validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes). 1163 1164 1165-spec parse_sparse_map(header_gnu(), descriptor_type()) -> 1166 {[sparse_entry()], descriptor_type()}. 1167parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) 1168 when Sparse#sparse_array.is_extended -> 1169 parse_sparse_map(Sparse, Reader, []); 1170parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) -> 1171 {Sparse#sparse_array.entries, Reader}. 1172parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) -> 1173 case read_block(Reader) of 1174 eof -> 1175 throw({error, eof}); 1176 {ok, Block, Reader2} -> 1177 Sparse2 = to_sparse_array(Block), 1178 parse_sparse_map(Sparse2, Reader2, Entries++Acc) 1179 end; 1180parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) -> 1181 Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) -> 1182 A =< B 1183 end, Entries++Acc), 1184 {Sorted, Reader}. 1185 1186%% Defined by taking the sum of the unsigned byte values of the 1187%% entire header record, treating the checksum bytes to as ASCII spaces 1188compute_checksum(<<H1:?V7_CHKSUM/binary, 1189 H2:?V7_CHKSUM_LEN/binary, 1190 Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary, 1191 _/binary>>) -> 1192 C0 = checksum(H1) + (byte_size(H2) * $\s), 1193 C1 = checksum(Rest), 1194 C0 + C1. 1195 1196compute_signed_checksum(<<H1:?V7_CHKSUM/binary, 1197 H2:?V7_CHKSUM_LEN/binary, 1198 Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary, 1199 _/binary>>) -> 1200 C0 = signed_checksum(H1) + (byte_size(H2) * $\s), 1201 C1 = signed_checksum(Rest), 1202 C0 + C1. 1203 1204%% Returns the checksum of a binary. 1205checksum(Bin) -> checksum(Bin, 0). 1206checksum(<<A/unsigned,Rest/binary>>, Sum) -> 1207 checksum(Rest, Sum+A); 1208checksum(<<>>, Sum) -> Sum. 1209 1210signed_checksum(Bin) -> signed_checksum(Bin, 0). 1211signed_checksum(<<A/signed,Rest/binary>>, Sum) -> 1212 signed_checksum(Rest, Sum+A); 1213signed_checksum(<<>>, Sum) -> Sum. 1214 1215-spec parse_numeric(binary()) -> non_neg_integer(). 1216parse_numeric(<<>>) -> 1217 0; 1218parse_numeric(<<First, _/binary>> = Bin) -> 1219 %% check for base-256 format first 1220 %% if the bit is set, then all following bits constitute a two's 1221 %% complement encoded number in big-endian byte order 1222 if 1223 First band 16#80 =/= 0 -> 1224 %% Handling negative numbers relies on the following identity: 1225 %% -a-1 == ^a 1226 %% If the number is negative, we use an inversion mask to invert 1227 %% the data bytes and treat the value as an unsigned number 1228 Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end, 1229 Bytes = binary:bin_to_list(Bin), 1230 Reducer = fun (C, {I, X}) -> 1231 C1 = C bxor Inv, 1232 C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end, 1233 if (X bsr 56) > 0 -> 1234 throw({error,integer_overflow}); 1235 true -> 1236 {I+1, (X bsl 8) bor C2} 1237 end 1238 end, 1239 {_, N} = lists:foldl(Reducer, {0,0}, Bytes), 1240 if (N bsr 63) > 0 -> 1241 throw({error, integer_overflow}); 1242 true -> 1243 if Inv =:= 16#FF -> 1244 -1 bxor N; 1245 true -> 1246 N 1247 end 1248 end; 1249 true -> 1250 %% normal case is an octal number 1251 parse_octal(Bin) 1252 end. 1253 1254parse_octal(Bin) when is_binary(Bin) -> 1255 %% skip leading/trailing zero bytes and spaces 1256 do_parse_octal(Bin, <<>>). 1257do_parse_octal(<<>>, <<>>) -> 1258 0; 1259do_parse_octal(<<>>, Acc) -> 1260 case io_lib:fread("~8u", binary:bin_to_list(Acc)) of 1261 {error, _} -> throw({error, invalid_tar_checksum}); 1262 {ok, [Octal], []} -> Octal; 1263 {ok, _, _} -> throw({error, invalid_tar_checksum}) 1264 end; 1265do_parse_octal(<<$\s,Rest/binary>>, Acc) -> 1266 do_parse_octal(Rest, Acc); 1267do_parse_octal(<<0, Rest/binary>>, Acc) -> 1268 do_parse_octal(Rest, Acc); 1269do_parse_octal(<<C, Rest/binary>>, Acc) -> 1270 do_parse_octal(Rest, <<Acc/binary, C>>). 1271 1272parse_string(Bin) when is_binary(Bin) -> 1273 do_parse_string(Bin, <<>>). 1274do_parse_string(<<>>, Acc) -> 1275 case unicode:characters_to_list(Acc) of 1276 Str when is_list(Str) -> 1277 Str; 1278 {incomplete, _Str, _Rest} -> 1279 binary:bin_to_list(Acc); 1280 {error, _Str, _Rest} -> 1281 throw({error, {bad_header, invalid_string}}) 1282 end; 1283do_parse_string(<<0, _/binary>>, Acc) -> 1284 do_parse_string(<<>>, Acc); 1285do_parse_string(<<C, Rest/binary>>, Acc) -> 1286 do_parse_string(Rest, <<Acc/binary, C>>). 1287 1288convert_header(Bin, #reader{pos=Pos}=Reader) 1289 when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 -> 1290 case get_format(Bin) of 1291 ?FORMAT_UNKNOWN -> 1292 throw({error, bad_header}); 1293 {ok, Format, V7} -> 1294 unpack_format(Format, V7, Bin, Reader); 1295 {error, Reason} -> 1296 throw({error, {bad_header, Reason}}) 1297 end; 1298convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE -> 1299 throw({error, misaligned_read, Pos}); 1300convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 -> 1301 eof; 1302convert_header(_Bin, _Reader) -> 1303 throw({error, eof}). 1304 1305%% Creates a partially-populated header record based 1306%% on the provided file_info record. If the file is 1307%% a symlink, then `link` is used as the link target. 1308%% If the file is a directory, a slash is appended to the name. 1309fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) -> 1310 BaseHeader = #tar_header{name=Name, 1311 mtime=Fi#file_info.mtime, 1312 atime=Fi#file_info.atime, 1313 ctime=Fi#file_info.ctime, 1314 mode=Fi#file_info.mode, 1315 uid=Fi#file_info.uid, 1316 gid=Fi#file_info.gid, 1317 typeflag=?TYPE_REGULAR}, 1318 do_fileinfo_to_header(BaseHeader, Fi, Link). 1319 1320do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) -> 1321 Header#tar_header{size=Size,typeflag=?TYPE_REGULAR}; 1322do_fileinfo_to_header(#tar_header{name=Name}=Header, 1323 #file_info{type=directory}, _Link) -> 1324 Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR}; 1325do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) -> 1326 Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link}; 1327do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) 1328 when (Mode band ?S_IFMT) =:= ?S_IFCHR -> 1329 Header#tar_header{typeflag=?TYPE_CHAR, 1330 devmajor=Fi#file_info.major_device, 1331 devminor=Fi#file_info.minor_device}; 1332do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) 1333 when (Mode band ?S_IFMT) =:= ?S_IFBLK -> 1334 Header#tar_header{typeflag=?TYPE_BLOCK, 1335 devmajor=Fi#file_info.major_device, 1336 devminor=Fi#file_info.minor_device}; 1337do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link) 1338 when (Mode band ?S_IFMT) =:= ?S_FIFO -> 1339 Header#tar_header{typeflag=?TYPE_FIFO}; 1340do_fileinfo_to_header(Header, Fi, _Link) -> 1341 {error, {invalid_file_type, Header#tar_header.name, Fi}}. 1342 1343is_ascii(Str) when is_list(Str) -> 1344 not lists:any(fun (Char) -> Char >= 16#80 end, Str); 1345is_ascii(Bin) when is_binary(Bin) -> 1346 is_ascii1(Bin). 1347 1348is_ascii1(<<>>) -> 1349 true; 1350is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 -> 1351 false; 1352is_ascii1(<<_, Rest/binary>>) -> 1353 is_ascii1(Rest). 1354 1355to_ascii(Str) when is_list(Str) -> 1356 case is_ascii(Str) of 1357 true -> 1358 unicode:characters_to_binary(Str); 1359 false -> 1360 Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str), 1361 unicode:characters_to_binary(Chars) 1362 end; 1363to_ascii(Bin) when is_binary(Bin) -> 1364 to_ascii(Bin, <<>>). 1365to_ascii(<<>>, Acc) -> 1366 Acc; 1367to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 -> 1368 to_ascii(Rest, <<Acc/binary,C>>); 1369to_ascii(<<_, Rest/binary>>, Acc) -> 1370 to_ascii(Rest, Acc). 1371 1372is_header_only_type(?TYPE_SYMLINK) -> true; 1373is_header_only_type(?TYPE_LINK) -> true; 1374is_header_only_type(?TYPE_DIR) -> true; 1375is_header_only_type(_) -> false. 1376 1377foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts) 1378 when is_function(Fun,4) -> 1379 case foldl_read0(Reader, Fun, Accu, Opts) of 1380 {ok, Result, _Reader2} -> 1381 Result; 1382 {error, _} = Err -> 1383 Err 1384 end; 1385foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) -> 1386 {error, {read_mode_expected, Access}}; 1387foldl_read(TarName, Fun, Accu, #read_opts{}=Opts) 1388 when is_function(Fun,4) -> 1389 try open(TarName, [read|Opts#read_opts.open_mode]) of 1390 {ok, #reader{access=read}=Reader} -> 1391 try 1392 foldl_read(Reader, Fun, Accu, Opts) 1393 after 1394 _ = close(Reader) 1395 end; 1396 {error, _} = Err -> 1397 Err 1398 catch 1399 throw:Err -> 1400 Err 1401 end. 1402 1403foldl_read0(Reader, Fun, Accu, Opts) -> 1404 try foldl_read1(Fun, Accu, Reader, Opts, #{}) of 1405 {ok,_,_} = Ok -> 1406 Ok 1407 catch 1408 throw:{error, {Reason, Format, Args}} -> 1409 read_verbose(Opts, Format, Args), 1410 {error, Reason}; 1411 throw:Err -> 1412 Err 1413 end. 1414 1415foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) -> 1416 {ok, Reader1} = skip_unread(Reader0), 1417 case get_header(Reader1) of 1418 eof -> 1419 Fun(eof, Reader1, Opts, Accu0); 1420 {Header, Reader2} -> 1421 case Header#tar_header.typeflag of 1422 ?TYPE_X_HEADER -> 1423 {ExtraHeaders2, Reader3} = parse_pax(Reader2), 1424 ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2), 1425 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3); 1426 ?TYPE_GNU_LONGNAME -> 1427 {RealName, Reader3} = get_real_name(Reader2), 1428 ExtraHeaders2 = maps:put(?PAX_PATH, 1429 parse_string(RealName), ExtraHeaders), 1430 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); 1431 ?TYPE_GNU_LONGLINK -> 1432 {RealName, Reader3} = get_real_name(Reader2), 1433 ExtraHeaders2 = maps:put(?PAX_LINKPATH, 1434 parse_string(RealName), ExtraHeaders), 1435 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); 1436 _ -> 1437 Header1 = merge_pax(Header, ExtraHeaders), 1438 {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0), 1439 foldl_read1(Fun, NewAccu, Reader3, Opts, #{}) 1440 end 1441 end. 1442 1443%% Applies all known PAX attributes to the current tar header 1444-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header(). 1445merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) -> 1446 do_merge_pax(Header, maps:to_list(ExtraHeaders)). 1447 1448do_merge_pax(Header, []) -> 1449 Header; 1450do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) -> 1451 do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest); 1452do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) -> 1453 do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest); 1454do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) -> 1455 do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest); 1456do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) -> 1457 do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest); 1458do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) -> 1459 Uid2 = binary_to_integer(Uid), 1460 do_merge_pax(Header#tar_header{uid=Uid2}, Rest); 1461do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) -> 1462 Gid2 = binary_to_integer(Gid), 1463 do_merge_pax(Header#tar_header{gid=Gid2}, Rest); 1464do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) -> 1465 Atime2 = parse_pax_time(Atime), 1466 do_merge_pax(Header#tar_header{atime=Atime2}, Rest); 1467do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) -> 1468 Mtime2 = parse_pax_time(Mtime), 1469 do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest); 1470do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) -> 1471 Ctime2 = parse_pax_time(Ctime), 1472 do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest); 1473do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) -> 1474 Size2 = binary_to_integer(Size), 1475 do_merge_pax(Header#tar_header{size=Size2}, Rest); 1476do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) -> 1477 do_merge_pax(Header, Rest); 1478do_merge_pax(Header, [_Ignore|Rest]) -> 1479 do_merge_pax(Header, Rest). 1480 1481%% Returns the time since UNIX epoch as a datetime 1482-spec parse_pax_time(binary()) -> tar_time(). 1483parse_pax_time(Bin) when is_binary(Bin) -> 1484 TotalNano = case binary:split(Bin, [<<$.>>]) of 1485 [SecondsStr, NanoStr0] -> 1486 Seconds = binary_to_integer(SecondsStr), 1487 if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE -> 1488 %% right pad 1489 PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0), 1490 Padding = binary:copy(<<$0>>, PaddingN), 1491 NanoStr1 = <<NanoStr0/binary,Padding/binary>>, 1492 Nano = binary_to_integer(NanoStr1), 1493 (Seconds*?BILLION)+Nano; 1494 byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE -> 1495 %% right truncate 1496 NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE), 1497 Nano = binary_to_integer(NanoStr1), 1498 (Seconds*?BILLION)+Nano; 1499 true -> 1500 (Seconds*?BILLION)+binary_to_integer(NanoStr0) 1501 end; 1502 [SecondsStr] -> 1503 binary_to_integer(SecondsStr)*?BILLION 1504 end, 1505 %% truncate to microseconds 1506 Micro = TotalNano div 1000, 1507 Mega = Micro div 1000000000000, 1508 Secs = Micro div 1000000 - (Mega*1000000), 1509 Secs. 1510 1511%% Given a regular file reader, reads the whole file and 1512%% parses all extended attributes it contains. 1513parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1514 {#{}, Handle}; 1515parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> 1516 case do_read(Handle0, NumBytes) of 1517 {ok, Bytes, Handle1} -> 1518 do_parse_pax(Handle1, Bytes, #{}); 1519 {error, _} = Err -> 1520 throw(Err) 1521 end. 1522 1523do_parse_pax(Reader, <<>>, Headers) -> 1524 {Headers, Reader}; 1525do_parse_pax(Reader, Bin, Headers) -> 1526 {Key, Value, Residual} = parse_pax_record(Bin), 1527 NewHeaders = maps:put(Key, Value, Headers), 1528 do_parse_pax(Reader, Residual, NewHeaders). 1529 1530%% Parse an extended attribute 1531parse_pax_record(Bin) when is_binary(Bin) -> 1532 case binary:split(Bin, [<<$\n>>]) of 1533 [Record, Residual] -> 1534 case binary:split(Record, [<<$\s>>], [trim_all]) of 1535 [_Len, Record1] -> 1536 case binary:split(Record1, [<<$=>>], [trim_all]) of 1537 [AttrName, AttrValue] -> 1538 {AttrName, AttrValue, Residual}; 1539 _Other -> 1540 throw({error, malformed_pax_record}) 1541 end; 1542 _Other -> 1543 throw({error, malformed_pax_record}) 1544 end; 1545 _Other -> 1546 throw({error, malformed_pax_record}) 1547 end. 1548 1549get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1550 {"", Handle}; 1551get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> 1552 case do_read(Handle0, NumBytes) of 1553 {ok, RealName, Handle1} -> 1554 {RealName, Handle1}; 1555 {error, _} = Err -> 1556 throw(Err) 1557 end; 1558get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) -> 1559 case do_read(Reader0, NumBytes) of 1560 {ok, RealName, Reader1} -> 1561 {RealName, Reader1}; 1562 {error, _} = Err -> 1563 throw(Err) 1564 end. 1565 1566%% Skip the remaining bytes for the current file entry 1567skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) -> 1568 Padding = skip_padding(Size), 1569 AbsPos = Handle0#reader.pos + (Size-Pos) + Padding, 1570 case do_position(Handle0, AbsPos) of 1571 {ok, _, Handle1} -> 1572 Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size}; 1573 Err -> 1574 throw(Err) 1575 end; 1576skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) -> 1577 case do_read(Reader, Size-Pos) of 1578 {ok, _, Reader2} -> 1579 Reader2; 1580 Err -> 1581 throw(Err) 1582 end. 1583 1584skip_padding(0) -> 1585 0; 1586skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 -> 1587 0; 1588skip_padding(Size) when Size =< ?BLOCK_SIZE -> 1589 ?BLOCK_SIZE - Size; 1590skip_padding(Size) -> 1591 ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE). 1592 1593skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 -> 1594 Padding = skip_padding(Pos + ?BLOCK_SIZE), 1595 AbsPos = Pos + Padding, 1596 case do_position(Reader0, AbsPos) of 1597 {ok, _, Reader1} -> 1598 {ok, Reader1}; 1599 Err -> 1600 throw(Err) 1601 end; 1602skip_unread(#reader{}=Reader) -> 1603 {ok, Reader}; 1604skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1605 skip_unread(Handle); 1606skip_unread(#reg_file_reader{}=Reader) -> 1607 #reg_file_reader{handle=Handle} = skip_file(Reader), 1608 {ok, Handle}; 1609skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) -> 1610 skip_unread(Handle); 1611skip_unread(#sparse_file_reader{}=Reader) -> 1612 #sparse_file_reader{handle=Handle} = skip_file(Reader), 1613 {ok, Handle}. 1614 1615write_extracted_element(#tar_header{name=Name,typeflag=Type}, 1616 Bin, 1617 #read_opts{output=memory}=Opts) -> 1618 case typeflag(Type) of 1619 regular -> 1620 read_verbose(Opts, "x ~ts~n", [Name]), 1621 {ok, {Name, Bin}}; 1622 _ -> 1623 ok 1624 end; 1625write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) -> 1626 Name1 = make_safe_path(Name0, Opts), 1627 Created = 1628 case typeflag(Header#tar_header.typeflag) of 1629 regular -> 1630 create_regular(Name1, Name0, Bin, Opts); 1631 directory -> 1632 read_verbose(Opts, "x ~ts~n", [Name0]), 1633 create_extracted_dir(Name1, Opts); 1634 symlink -> 1635 read_verbose(Opts, "x ~ts~n", [Name0]), 1636 LinkName = safe_link_name(Header, Opts), 1637 create_symlink(Name1, LinkName, Opts); 1638 Device when Device =:= char orelse Device =:= block -> 1639 %% char/block devices will be created as empty files 1640 %% and then have their major/minor device set later 1641 create_regular(Name1, Name0, <<>>, Opts); 1642 fifo -> 1643 %% fifo devices will be created as empty files 1644 create_regular(Name1, Name0, <<>>, Opts); 1645 Other -> % Ignore. 1646 read_verbose(Opts, "x ~ts - unsupported type ~p~n", 1647 [Name0, Other]), 1648 not_written 1649 end, 1650 case Created of 1651 ok -> set_extracted_file_info(Name1, Header); 1652 not_written -> ok 1653 end. 1654 1655make_safe_path([$/|Path], Opts) -> 1656 make_safe_path(Path, Opts); 1657make_safe_path(Path0, #read_opts{cwd=Cwd}) -> 1658 case filelib:safe_relative_path(Path0, Cwd) of 1659 unsafe -> throw({error,{Path0,unsafe_path}}); 1660 Path -> filename:absname(Path, Cwd) 1661 end. 1662 1663safe_link_name(#tar_header{linkname=Path0},#read_opts{cwd=Cwd} ) -> 1664 case filelib:safe_relative_path(Path0, Cwd) of 1665 unsafe -> throw({error,{Path0,unsafe_symlink}}); 1666 Path -> Path 1667 end. 1668 1669create_regular(Name, NameInArchive, Bin, Opts) -> 1670 case write_extracted_file(Name, Bin, Opts) of 1671 not_written -> 1672 read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]), 1673 not_written; 1674 Ok -> 1675 read_verbose(Opts, "x ~ts~n", [NameInArchive]), 1676 Ok 1677 end. 1678 1679create_extracted_dir(Name, _Opts) -> 1680 case file:make_dir(Name) of 1681 ok -> ok; 1682 {error,enotsup} -> not_written; 1683 {error,eexist} -> not_written; 1684 {error,enoent} -> make_dirs(Name, dir); 1685 {error,Reason} -> throw({error, Reason}) 1686 end. 1687 1688create_symlink(Name, Linkname, Opts) -> 1689 case file:make_symlink(Linkname, Name) of 1690 ok -> ok; 1691 {error,enoent} -> 1692 ok = make_dirs(Name, file), 1693 create_symlink(Name, Linkname, Opts); 1694 {error,eexist} -> not_written; 1695 {error,enotsup} -> 1696 read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]), 1697 not_written; 1698 {error,Reason} -> throw({error, Reason}) 1699 end. 1700 1701write_extracted_file(Name, Bin, Opts) -> 1702 Write = 1703 case Opts#read_opts.keep_old_files of 1704 true -> 1705 case file:read_file_info(Name) of 1706 {ok, _} -> false; 1707 _ -> true 1708 end; 1709 false -> true 1710 end, 1711 case Write of 1712 true -> write_file(Name, Bin); 1713 false -> not_written 1714 end. 1715 1716write_file(Name, Bin) -> 1717 case file:write_file(Name, Bin) of 1718 ok -> ok; 1719 {error,enoent} -> 1720 case make_dirs(Name, file) of 1721 ok -> 1722 write_file(Name, Bin); 1723 {error,Reason} -> 1724 throw({error, Reason}) 1725 end; 1726 {error,Reason} -> 1727 throw({error, Reason}) 1728 end. 1729 1730set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok; 1731set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK}) -> ok; 1732set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) -> 1733 set_device_info(Name, Header); 1734set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) -> 1735 set_device_info(Name, Header); 1736set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) -> 1737 Info = #file_info{mode=Mode, mtime=Mtime}, 1738 file:write_file_info(Name, Info, [{time, posix}]). 1739 1740set_device_info(Name, #tar_header{}=Header) -> 1741 Mtime = Header#tar_header.mtime, 1742 Mode = Header#tar_header.mode, 1743 Devmajor = Header#tar_header.devmajor, 1744 Devminor = Header#tar_header.devminor, 1745 Info = #file_info{ 1746 mode=Mode, 1747 mtime=Mtime, 1748 major_device=Devmajor, 1749 minor_device=Devminor 1750 }, 1751 file:write_file_info(Name, Info). 1752 1753%% Makes all directories leading up to the file. 1754 1755make_dirs(Name, file) -> 1756 filelib:ensure_dir(Name); 1757make_dirs(Name, dir) -> 1758 filelib:ensure_dir(filename:join(Name,"*")). 1759 1760%% Prints the message on if the verbose option is given (for reading). 1761read_verbose(#read_opts{verbose=true}, Format, Args) -> 1762 io:format(Format, Args); 1763read_verbose(_, _, _) -> 1764 ok. 1765 1766%% Prints the message on if the verbose option is given. 1767add_verbose(#add_opts{verbose=true}, Format, Args) -> 1768 io:format(Format, Args); 1769add_verbose(_, _, _) -> 1770 ok. 1771 1772%%%%%%%%%%%%%%%%%% 1773%% I/O primitives 1774%%%%%%%%%%%%%%%%%% 1775 1776do_write(#reader{handle=Handle,func=Fun}=Reader0, Data) 1777 when is_function(Fun,2) -> 1778 case Fun(write,{Handle,Data}) of 1779 ok -> 1780 {ok, Pos, Reader1} = do_position(Reader0, {cur,0}), 1781 {ok, Reader1#reader{pos=Pos}}; 1782 {error, _} = Err -> 1783 Err 1784 end. 1785 1786do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts) 1787 when is_function(Fun, 2) -> 1788 do_copy(Reader, Source, Opts#add_opts{chunk_size=65536}); 1789do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize}) 1790 when is_function(Fun, 2) -> 1791 case file:open(Source, [read, binary]) of 1792 {ok, SourceFd} -> 1793 case copy_chunked(Reader, SourceFd, ChunkSize, 0) of 1794 {ok, _Copied, _Reader2} = Ok-> 1795 _ = file:close(SourceFd), 1796 Ok; 1797 Err -> 1798 _ = file:close(SourceFd), 1799 throw(Err) 1800 end; 1801 Err -> 1802 throw(Err) 1803 end. 1804 1805copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) -> 1806 case file:read(Source, ChunkSize) of 1807 {ok, Bin} -> 1808 {ok, Reader2} = do_write(Reader, Bin), 1809 copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin)); 1810 eof -> 1811 {ok, Copied, Reader}; 1812 Other -> 1813 Other 1814 end. 1815 1816 1817do_position(#reader{handle=Handle,func=Fun}=Reader, Pos) 1818 when is_function(Fun,2)-> 1819 case Fun(position, {Handle,Pos}) of 1820 {ok, NewPos} -> 1821 %% since Pos may not always be an absolute seek, 1822 %% make sure we update the reader with the new absolute position 1823 {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}), 1824 {ok, NewPos, Reader#reader{pos=AbsPos}}; 1825 Other -> 1826 Other 1827 end. 1828 1829do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) -> 1830 NumBytes = Size - Pos, 1831 ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end, 1832 case do_read(Handle, ActualLen) of 1833 {ok, Bin, Handle2} -> 1834 NewPos = Pos + ActualLen, 1835 NumBytes2 = Size - NewPos, 1836 Reader1 = Reader#reg_file_reader{ 1837 handle=Handle2, 1838 pos=NewPos, 1839 num_bytes=NumBytes2}, 1840 {ok, Bin, Reader1}; 1841 Other -> 1842 Other 1843 end; 1844do_read(#sparse_file_reader{}=Reader, Len) -> 1845 do_sparse_read(Reader, Len); 1846do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len) 1847 when is_function(Fun,2)-> 1848 %% Always convert to binary internally 1849 case Fun(read2,{Handle,Len}) of 1850 {ok, List} when is_list(List) -> 1851 Bin = list_to_binary(List), 1852 NewPos = Pos+byte_size(Bin), 1853 {ok, Bin, Reader#reader{pos=NewPos}}; 1854 {ok, Bin} when is_binary(Bin) -> 1855 NewPos = Pos+byte_size(Bin), 1856 {ok, Bin, Reader#reader{pos=NewPos}}; 1857 Other -> 1858 Other 1859 end. 1860 1861 1862do_sparse_read(Reader, Len) -> 1863 do_sparse_read(Reader, Len, <<>>). 1864 1865do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries] 1866 }=Reader0, Len, Acc) -> 1867 %% skip all empty fragments 1868 Reader1 = Reader0#sparse_file_reader{sparse_map=Entries}, 1869 do_sparse_read(Reader1, Len, Acc); 1870do_sparse_read(#sparse_file_reader{sparse_map=[], 1871 pos=Pos,size=Size}=Reader0, Len, Acc) 1872 when Pos < Size -> 1873 %% if there are no more fragments, it is possible that there is one last sparse hole 1874 %% this behaviour matches the BSD tar utility 1875 %% however, GNU tar stops returning data even if we haven't reached the end 1876 {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len), 1877 do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1878do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) -> 1879 {ok, Acc, Reader}; 1880do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) -> 1881 {ok, Acc, Reader}; 1882do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_], 1883 pos=Pos}=Reader0, Len, Acc) 1884 when Pos < Offset -> 1885 {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos), 1886 do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1887do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries], 1888 pos=Pos}=Reader0, Len, Acc) -> 1889 %% we're in a data fragment, so read from it 1890 %% end offset of fragment 1891 EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes, 1892 %% bytes left in fragment 1893 NumBytes = EndPos - Pos, 1894 ActualLen = if Len > NumBytes -> NumBytes; true -> Len end, 1895 case do_read(Reader0#sparse_file_reader.handle, ActualLen) of 1896 {ok, Bin, Handle} -> 1897 BytesRead = byte_size(Bin), 1898 ActualEndPos = Pos+BytesRead, 1899 Reader1 = if ActualEndPos =:= EndPos -> 1900 Reader0#sparse_file_reader{sparse_map=Entries}; 1901 true -> 1902 Reader0 1903 end, 1904 Size = Reader1#sparse_file_reader.size, 1905 NumBytes2 = Size - ActualEndPos, 1906 Reader2 = Reader1#sparse_file_reader{ 1907 handle=Handle, 1908 pos=ActualEndPos, 1909 num_bytes=NumBytes2}, 1910 do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1911 Other -> 1912 Other 1913 end. 1914 1915%% Reads a sparse hole ending at Offset 1916read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) -> 1917 N = Offset - Pos, 1918 N2 = if N > Len -> 1919 Len; 1920 true -> 1921 N 1922 end, 1923 Bin = <<0:N2/unit:8>>, 1924 NumBytes = Reader#sparse_file_reader.size - (Pos+N2), 1925 {ok, Bin, Reader#sparse_file_reader{ 1926 num_bytes=NumBytes, 1927 pos=Pos+N2}}. 1928 1929-spec do_close(tar_descriptor()) -> ok | {error, term()}. 1930do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) -> 1931 Fun(close,Handle). 1932 1933%%%%%%%%%%%%%%%%%% 1934%% Option parsing 1935%%%%%%%%%%%%%%%%%% 1936 1937extract_opts(List) -> 1938 extract_opts(List, default_options()). 1939 1940table_opts(List) -> 1941 read_opts(List, default_options()). 1942 1943default_options() -> 1944 {ok, Cwd} = file:get_cwd(), 1945 #read_opts{cwd=Cwd}. 1946 1947extract_opts([keep_old_files|Rest], Opts) -> 1948 extract_opts(Rest, Opts#read_opts{keep_old_files=true}); 1949extract_opts([{cwd, Cwd}|Rest], Opts) -> 1950 extract_opts(Rest, Opts#read_opts{cwd=Cwd}); 1951extract_opts([{files, Files}|Rest], Opts) -> 1952 Set = ordsets:from_list(Files), 1953 extract_opts(Rest, Opts#read_opts{files=Set}); 1954extract_opts([memory|Rest], Opts) -> 1955 extract_opts(Rest, Opts#read_opts{output=memory}); 1956extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1957 extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); 1958extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1959 extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); 1960extract_opts([verbose|Rest], Opts) -> 1961 extract_opts(Rest, Opts#read_opts{verbose=true}); 1962extract_opts([Other|Rest], Opts) -> 1963 extract_opts(Rest, read_opts([Other], Opts)); 1964extract_opts([], Opts) -> 1965 Opts. 1966 1967read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1968 read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); 1969read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1970 read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); 1971read_opts([verbose|Rest], Opts) -> 1972 read_opts(Rest, Opts#read_opts{verbose=true}); 1973read_opts([_|Rest], Opts) -> 1974 read_opts(Rest, Opts); 1975read_opts([], Opts) -> 1976 Opts. 1977