1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1997-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% This module implements extraction/creation of tar archives. 21%% It supports reading most common tar formats, namely V7, STAR, 22%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR 23%% format, unless it must use PAX headers, in which case it produces PAX 24%% format. 25%% 26%% The following references where used: 27%% http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 28%% http://www.gnu.org/software/tar/manual/html_node/Standard.html 29%% http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html 30-module(erl_tar). 31 32-export([init/3, 33 create/2, create/3, 34 extract/1, extract/2, 35 table/1, table/2, t/1, tt/1, 36 open/2, close/1, 37 add/3, add/4, 38 format_error/1]). 39 40-include_lib("kernel/include/file.hrl"). 41-include_lib("erl_tar.hrl"). 42 43%% Converts the short error reason to a descriptive string. 44-spec format_error(term()) -> string(). 45format_error(invalid_tar_checksum) -> 46 "Checksum failed"; 47format_error(bad_header) -> 48 "Unrecognized tar header format"; 49format_error({bad_header, Reason}) -> 50 lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason])); 51format_error({invalid_header, negative_size}) -> 52 "Invalid header: negative size"; 53format_error(invalid_sparse_header_size) -> 54 "Invalid sparse header: negative size"; 55format_error(invalid_sparse_map_entry) -> 56 "Invalid sparse map entry"; 57format_error({invalid_sparse_map_entry, Reason}) -> 58 lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason])); 59format_error(invalid_end_of_archive) -> 60 "Invalid end of archive"; 61format_error(eof) -> 62 "Unexpected end of file"; 63format_error(integer_overflow) -> 64 "Failed to parse numeric: integer overflow"; 65format_error({misaligned_read, Pos}) -> 66 lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p", 67 [?BLOCK_SIZE, Pos])); 68format_error(invalid_gnu_1_0_sparsemap) -> 69 "Invalid GNU sparse map (version 1.0)"; 70format_error({invalid_gnu_0_1_sparsemap, Format}) -> 71 lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format])); 72format_error(unsafe_path) -> 73 "The path points above the current working directory"; 74format_error({Name,Reason}) -> 75 lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)])); 76format_error(Atom) when is_atom(Atom) -> 77 file:format_error(Atom); 78format_error(Term) -> 79 lists:flatten(io_lib:format("~tp", [Term])). 80 81%% Initializes a new reader given a custom file handle and I/O wrappers 82-spec init(UserData :: user_data(), write | read, file_op()) -> 83 {ok, tar_descriptor()} | {error, badarg}. 84init(UserData, AccessMode, Fun) when is_function(Fun, 2) -> 85 Reader = #reader{handle=UserData,access=AccessMode,func=Fun}, 86 {ok, Pos, Reader2} = do_position(Reader, {cur, 0}), 87 {ok, Reader2#reader{pos=Pos}}; 88init(_UserData, _AccessMode, _Fun) -> 89 {error, badarg}. 90 91%%%================================================================ 92%% Extracts all files from the tar file Name. 93-spec extract(Open :: open_type()) -> ok | {error, term()}. 94extract(Name) -> 95 extract(Name, []). 96 97%% Extracts (all) files from the tar file Name. 98%% Options accepted: 99%% - cooked: Opens the tar file without mode `raw` 100%% - compressed: Uncompresses the tar file when reading 101%% - memory: Returns the tar contents as a list of tuples {Name, Bin} 102%% - keep_old_files: Extracted files will not overwrite the destination 103%% - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract 104%% - verbose: Prints verbose information about the extraction, 105%% - {cwd, AbsoluteDir}: Sets the current working directory for the extraction 106-spec extract(Open :: open_type(), [extract_opt()]) -> 107 {ok, [{string(), binary()}]} | 108 {error, term()} | 109 ok. 110extract({binary, Bin}, Opts) when is_list(Opts) -> 111 do_extract({binary, Bin}, Opts); 112extract({file, Fd}, Opts) when is_list(Opts) -> 113 do_extract({file, Fd}, Opts); 114extract(#reader{}=Reader, Opts) when is_list(Opts) -> 115 do_extract(Reader, Opts); 116extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) -> 117 do_extract(Name, Opts). 118 119do_extract(Handle, Opts) when is_list(Opts) -> 120 Opts2 = extract_opts(Opts), 121 Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end, 122 foldl_read(Handle, fun extract1/4, Acc, Opts2). 123 124extract1(eof, Reader, _, Acc) when is_list(Acc) -> 125 {ok, {ok, lists:reverse(Acc)}, Reader}; 126extract1(eof, Reader, _, leading_slash) -> 127 error_logger:info_msg("erl_tar: removed leading '/' from member names\n"), 128 {ok, ok, Reader}; 129extract1(eof, Reader, _, Acc) -> 130 {ok, Acc, Reader}; 131extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) -> 132 case check_extract(Name, Opts) of 133 true -> 134 case do_read(Reader0, Size) of 135 {ok, Bin, Reader1} -> 136 Acc = extract2(Header, Bin, Opts, Acc0), 137 {ok, Acc, Reader1}; 138 {error, _} = Err -> 139 throw(Err) 140 end; 141 false -> 142 {ok, Acc0, skip_file(Reader0)} 143 end. 144 145extract2(Header, Bin, Opts, Acc) -> 146 case write_extracted_element(Header, Bin, Opts) of 147 ok -> 148 case Header of 149 #tar_header{name="/"++_} -> 150 leading_slash; 151 #tar_header{} -> 152 Acc 153 end; 154 {ok, NameBin} when is_list(Acc) -> 155 [NameBin | Acc]; 156 {error, _} = Err -> 157 throw(Err) 158 end. 159 160%% Checks if the file Name should be extracted. 161check_extract(_, #read_opts{files=all}) -> 162 true; 163check_extract(Name, #read_opts{files=Files}) -> 164 ordsets:is_element(Name, Files). 165 166%%%================================================================ 167%% The following table functions produce a list of information about 168%% the files contained in the archive. 169-type typeflag() :: regular | link | symlink | 170 char | block | directory | 171 fifo | reserved | unknown. 172-type mode() :: non_neg_integer(). 173-type uid() :: non_neg_integer(). 174-type gid() :: non_neg_integer(). 175 176-type tar_entry() :: {Name :: name_in_archive(), 177 Type :: typeflag(), 178 Size :: non_neg_integer(), 179 MTime :: tar_time(), 180 Mode :: mode(), 181 Uid :: uid(), 182 Gid :: gid()}. 183 184%% Returns a list of names of the files in the tar file Name. 185-spec table(Open :: open_type()) -> {ok, [name_in_archive()]} | {error, term()}. 186table(Name) -> 187 table(Name, []). 188 189%% Returns a list of names of the files in the tar file Name. 190%% Options accepted: compressed, verbose, cooked. 191-spec table(Open :: open_type(), [compressed | verbose | cooked]) -> 192 {ok, [name_in_archive() | tar_entry()]} | {error, term()}. 193table(Name, Opts) when is_list(Opts) -> 194 foldl_read(Name, fun table1/4, [], table_opts(Opts)). 195 196table1(eof, Reader, _, Result) -> 197 {ok, {ok, lists:reverse(Result)}, Reader}; 198table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) -> 199 Attrs = table1_attrs(Header, Verbose), 200 Reader2 = skip_file(Reader), 201 {ok, [Attrs|Result], Reader2}. 202 203%% Extracts attributes relevant to table1's output 204table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) -> 205 Type = typeflag(Typeflag), 206 Name = Header#tar_header.name, 207 Mtime = Header#tar_header.mtime, 208 Uid = Header#tar_header.uid, 209 Gid = Header#tar_header.gid, 210 Size = Header#tar_header.size, 211 {Name, Type, Size, Mtime, Mode, Uid, Gid}; 212table1_attrs(#tar_header{name=Name}, _Verbose) -> 213 Name. 214 215typeflag(?TYPE_REGULAR) -> regular; 216typeflag(?TYPE_REGULAR_A) -> regular; 217typeflag(?TYPE_GNU_SPARSE) -> regular; 218typeflag(?TYPE_CONT) -> regular; 219typeflag(?TYPE_LINK) -> link; 220typeflag(?TYPE_SYMLINK) -> symlink; 221typeflag(?TYPE_CHAR) -> char; 222typeflag(?TYPE_BLOCK) -> block; 223typeflag(?TYPE_DIR) -> directory; 224typeflag(?TYPE_FIFO) -> fifo; 225typeflag(_) -> unknown. 226 227%%%================================================================ 228%% Comments for printing the contents of a tape archive, 229%% meant to be invoked from the shell. 230 231%% Prints each filename in the archive 232-spec t(file:filename()) -> ok | {error, term()}. 233t(Name) when is_list(Name); is_binary(Name) -> 234 case table(Name) of 235 {ok, List} -> 236 lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List); 237 Error -> 238 Error 239 end. 240 241%% Prints verbose information about each file in the archive 242-spec tt(open_type()) -> ok | {error, term()}. 243tt(Name) -> 244 case table(Name, [verbose]) of 245 {ok, List} -> 246 lists:foreach(fun print_header/1, List); 247 Error -> 248 Error 249 end. 250 251%% Used by tt/1 to print a tar_entry tuple 252-spec print_header(tar_entry()) -> ok. 253print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) -> 254 io:format("~s~s ~4w/~-4w ~7w ~s ~s\n", 255 [type_to_string(Type), mode_to_string(Mode), 256 Uid, Gid, Size, time_to_string(Mtime), Name]). 257 258type_to_string(regular) -> "-"; 259type_to_string(directory) -> "d"; 260type_to_string(link) -> "l"; 261type_to_string(symlink) -> "s"; 262type_to_string(char) -> "c"; 263type_to_string(block) -> "b"; 264type_to_string(fifo) -> "f"; 265type_to_string(unknown) -> "?". 266 267%% Converts a numeric mode to its human-readable representation 268mode_to_string(Mode) -> 269 mode_to_string(Mode, "xwrxwrxwr", []). 270mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 -> 271 mode_to_string(Mode bsr 1, T, [C|Acc]); 272mode_to_string(Mode, [_|T], Acc) -> 273 mode_to_string(Mode bsr 1, T, [$-|Acc]); 274mode_to_string(_, [], Acc) -> 275 Acc. 276 277%% Converts a tar_time() (POSIX time) to a readable string 278time_to_string(Secs0) -> 279 Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH), 280 Secs = Epoch + Secs0, 281 DateTime0 = calendar:gregorian_seconds_to_datetime(Secs), 282 DateTime = calendar:universal_time_to_local_time(DateTime0), 283 {{Y, Mon, Day}, {H, Min, _}} = DateTime, 284 io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]). 285 286two_d(N) -> 287 tl(integer_to_list(N + 100)). 288 289month(1) -> "Jan"; 290month(2) -> "Feb"; 291month(3) -> "Mar"; 292month(4) -> "Apr"; 293month(5) -> "May"; 294month(6) -> "Jun"; 295month(7) -> "Jul"; 296month(8) -> "Aug"; 297month(9) -> "Sep"; 298month(10) -> "Oct"; 299month(11) -> "Nov"; 300month(12) -> "Dec". 301 302%%%================================================================ 303%% The open function with friends is to keep the file and binary api of this module 304-type open_type() :: file:filename_all() 305 | {binary, binary()} 306 | {file, file:io_device()}. 307-spec open(Open :: open_type(), [write | compressed | cooked]) -> 308 {ok, tar_descriptor()} | {error, term()}. 309open({binary, Bin}, Mode) when is_binary(Bin) -> 310 do_open({binary, Bin}, Mode); 311open({file, Fd}, Mode) -> 312 do_open({file, Fd}, Mode); 313open(Name, Mode) when is_list(Name); is_binary(Name) -> 314 do_open(Name, Mode). 315 316do_open(Name, Mode) when is_list(Mode) -> 317 case open_mode(Mode) of 318 {ok, Access, Raw, Opts} -> 319 open1(Name, Access, Raw, Opts); 320 {error, Reason} -> 321 {error, {Name, Reason}} 322 end. 323 324open1({binary,Bin}, read, _Raw, Opts) when is_binary(Bin) -> 325 case file:open(Bin, [ram,binary,read]) of 326 {ok,File} -> 327 _ = [ram_file:uncompress(File) || lists:member(compressed, Opts)], 328 {ok, #reader{handle=File,access=read,func=fun file_op/2}}; 329 Error -> 330 Error 331 end; 332open1({file, Fd}, read, _Raw, _Opts) -> 333 Reader = #reader{handle=Fd,access=read,func=fun file_op/2}, 334 case do_position(Reader, {cur, 0}) of 335 {ok, Pos, Reader2} -> 336 {ok, Reader2#reader{pos=Pos}}; 337 {error, _} = Err -> 338 Err 339 end; 340open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) -> 341 case file:open(Name, Raw ++ [binary, Access|Opts]) of 342 {ok, File} -> 343 {ok, #reader{handle=File,access=Access,func=fun file_op/2}}; 344 {error, Reason} -> 345 {error, {Name, Reason}} 346 end. 347 348open_mode(Mode) -> 349 open_mode(Mode, false, [raw], []). 350 351open_mode(read, _, Raw, _) -> 352 {ok, read, Raw, []}; 353open_mode(write, _, Raw, _) -> 354 {ok, write, Raw, []}; 355open_mode([read|Rest], false, Raw, Opts) -> 356 open_mode(Rest, read, Raw, Opts); 357open_mode([write|Rest], false, Raw, Opts) -> 358 open_mode(Rest, write, Raw, Opts); 359open_mode([compressed|Rest], Access, Raw, Opts) -> 360 open_mode(Rest, Access, Raw, [compressed,read_ahead|Opts]); 361open_mode([cooked|Rest], Access, _Raw, Opts) -> 362 open_mode(Rest, Access, [], Opts); 363open_mode([], Access, Raw, Opts) -> 364 {ok, Access, Raw, Opts}; 365open_mode(_, _, _, _) -> 366 {error, einval}. 367 368file_op(write, {Fd, Data}) -> 369 file:write(Fd, Data); 370file_op(position, {Fd, Pos}) -> 371 file:position(Fd, Pos); 372file_op(read2, {Fd, Size}) -> 373 file:read(Fd, Size); 374file_op(close, Fd) -> 375 file:close(Fd). 376 377%% Closes a tar archive. 378-spec close(TarDescriptor :: tar_descriptor()) -> ok | {error, term()}. 379close(#reader{access=read}=Reader) -> 380 ok = do_close(Reader); 381close(#reader{access=write}=Reader) -> 382 {ok, Reader2} = pad_file(Reader), 383 ok = do_close(Reader2), 384 ok; 385close(_) -> 386 {error, einval}. 387 388pad_file(#reader{pos=Pos}=Reader) -> 389 %% There must be at least two zero blocks at the end. 390 PadCurrent = skip_padding(Pos+?BLOCK_SIZE), 391 Padding = <<0:PadCurrent/unit:8>>, 392 do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]). 393 394 395%%%================================================================ 396%% Creation/modification of tar archives 397 398%% Creates a tar file Name containing the given files. 399-spec create(file:filename_all(), filelist()) -> ok | {error, {string(), term()}}. 400create(Name, FileList) when is_list(Name); is_binary(Name) -> 401 create(Name, FileList, []). 402 403%% Creates a tar archive Name containing the given files. 404%% Accepted options: verbose, compressed, cooked 405-spec create(file:filename_all(), filelist(), [create_opt()]) -> 406 ok | {error, term()} | {error, {string(), term()}}. 407create(Name, FileList, Options) when is_list(Name); is_binary(Name) -> 408 Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) 409 end, Options), 410 case open(Name, [write|Mode]) of 411 {ok, TarFile} -> 412 do_create(TarFile, FileList, Options); 413 {error, _} = Err -> 414 Err 415 end. 416 417do_create(TarFile, [], _Opts) -> 418 close(TarFile); 419do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) -> 420 case add(TarFile, NameOrBin, NameInArchive, Opts) of 421 ok -> 422 do_create(TarFile, Rest, Opts); 423 {error, _} = Err -> 424 _ = close(TarFile), 425 Err 426 end; 427do_create(TarFile, [Name|Rest], Opts) -> 428 case add(TarFile, Name, Name, Opts) of 429 ok -> 430 do_create(TarFile, Rest, Opts); 431 {error, _} = Err -> 432 _ = close(TarFile), 433 Err 434 end. 435 436%% Adds a file to a tape archive. 437-type add_type() :: name_in_archive() 438 | {name_in_archive(), file:filename_all()}. 439-spec add(TarDescriptor, AddType, Options) -> ok | {error, term()} when 440 TarDescriptor :: tar_descriptor(), 441 AddType :: add_type(), 442 Options :: [add_opt()]. 443add(Reader, {NameInArchive, Name}, Opts) 444 when is_list(NameInArchive), is_list(Name) -> 445 do_add(Reader, Name, NameInArchive, Opts); 446add(Reader, {NameInArchive, Bin}, Opts) 447 when is_list(NameInArchive), is_binary(Bin) -> 448 do_add(Reader, Bin, NameInArchive, Opts); 449add(Reader, Name, Opts) when is_list(Name) -> 450 do_add(Reader, Name, Name, Opts). 451 452-spec add(TarDescriptor, Filename, NameInArchive, Options) -> 453 ok | {error, term()} when 454 TarDescriptor :: tar_descriptor(), 455 Filename :: file:filename_all(), 456 NameInArchive :: name_in_archive(), 457 Options :: [add_opt()]. 458add(Reader, NameOrBin, NameInArchive, Options) 459 when is_list(NameOrBin); is_binary(NameOrBin), 460 is_list(NameInArchive), is_list(Options) -> 461 do_add(Reader, NameOrBin, NameInArchive, Options). 462 463do_add(#reader{access=write}=Reader, Name, NameInArchive, Options) 464 when is_list(NameInArchive), is_list(Options) -> 465 RF = apply_file_info_opts_fun(Options, read_link_info), 466 Opts = #add_opts{read_info=RF}, 467 add1(Reader, Name, NameInArchive, add_opts(Options, Options, Opts)); 468do_add(#reader{access=read},_,_,_) -> 469 {error, eacces}; 470do_add(Reader,_,_,_) -> 471 {error, {badarg, Reader}}. 472 473add_opts([dereference|T], AllOptions, Opts) -> 474 RF = apply_file_info_opts_fun(AllOptions, read_file_info), 475 add_opts(T, AllOptions, Opts#add_opts{read_info=RF}); 476add_opts([verbose|T], AllOptions, Opts) -> 477 add_opts(T, AllOptions, Opts#add_opts{verbose=true}); 478add_opts([{chunks,N}|T], AllOptions, Opts) -> 479 add_opts(T, AllOptions, Opts#add_opts{chunk_size=N}); 480add_opts([{atime,Value}|T], AllOptions, Opts) -> 481 add_opts(T, AllOptions, Opts#add_opts{atime=Value}); 482add_opts([{mtime,Value}|T], AllOptions, Opts) -> 483 add_opts(T, AllOptions, Opts#add_opts{mtime=Value}); 484add_opts([{ctime,Value}|T], AllOptions, Opts) -> 485 add_opts(T, AllOptions, Opts#add_opts{ctime=Value}); 486add_opts([{uid,Value}|T], AllOptions, Opts) -> 487 add_opts(T, AllOptions, Opts#add_opts{uid=Value}); 488add_opts([{gid,Value}|T], AllOptions, Opts) -> 489 add_opts(T, AllOptions, Opts#add_opts{gid=Value}); 490add_opts([_|T], AllOptions, Opts) -> 491 add_opts(T, AllOptions, Opts); 492add_opts([], _AllOptions, Opts) -> 493 Opts. 494 495apply_file_info_opts(Opts, {ok, FileInfo}) -> 496 {ok, do_apply_file_info_opts(Opts, FileInfo)}; 497apply_file_info_opts(_Opts, Other) -> 498 Other. 499 500do_apply_file_info_opts([{atime,Value}|T], FileInfo) -> 501 do_apply_file_info_opts(T, FileInfo#file_info{atime=Value}); 502do_apply_file_info_opts([{mtime,Value}|T], FileInfo) -> 503 do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value}); 504do_apply_file_info_opts([{ctime,Value}|T], FileInfo) -> 505 do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value}); 506do_apply_file_info_opts([{uid,Value}|T], FileInfo) -> 507 do_apply_file_info_opts(T, FileInfo#file_info{uid=Value}); 508do_apply_file_info_opts([{gid,Value}|T], FileInfo) -> 509 do_apply_file_info_opts(T, FileInfo#file_info{gid=Value}); 510do_apply_file_info_opts([_|T], FileInfo) -> 511 do_apply_file_info_opts(T, FileInfo); 512do_apply_file_info_opts([], FileInfo) -> 513 FileInfo. 514 515apply_file_info_opts_fun(Options, InfoFunction) -> 516 fun(F) -> 517 apply_file_info_opts(Options, file:InfoFunction(F, [{time, posix}])) 518 end. 519 520add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts) 521 when is_list(Name) -> 522 Res = case ReadInfo(Name) of 523 {error, Reason0} -> 524 {error, {Name, Reason0}}; 525 {ok, #file_info{type=symlink}=Fi} -> 526 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 527 {ok, Linkname} = file:read_link(Name), 528 Header = fileinfo_to_header(NameInArchive, Fi, Linkname), 529 add_header(Reader, Header, Opts); 530 {ok, #file_info{type=regular}=Fi} -> 531 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 532 Header = fileinfo_to_header(NameInArchive, Fi, false), 533 {ok, Reader2} = add_header(Reader, Header, Opts), 534 FileSize = Header#tar_header.size, 535 {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts), 536 Padding = skip_padding(FileSize), 537 Pad = <<0:Padding/unit:8>>, 538 do_write(Reader3, Pad); 539 {ok, #file_info{type=directory}=Fi} -> 540 add_directory(Reader, Name, NameInArchive, Fi, Opts); 541 {ok, #file_info{}=Fi} -> 542 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 543 Header = fileinfo_to_header(NameInArchive, Fi, false), 544 add_header(Reader, Header, Opts) 545 end, 546 case Res of 547 ok -> ok; 548 {ok, _Reader} -> ok; 549 {error, _Reason} = Err -> Err 550 end; 551add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) -> 552 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 553 Now = os:system_time(seconds), 554 Header = #tar_header{ 555 name = NameInArchive, 556 size = byte_size(Bin), 557 typeflag = ?TYPE_REGULAR, 558 atime = add_opts_time(Opts#add_opts.atime, Now), 559 mtime = add_opts_time(Opts#add_opts.mtime, Now), 560 ctime = add_opts_time(Opts#add_opts.ctime, Now), 561 uid = Opts#add_opts.uid, 562 gid = Opts#add_opts.gid, 563 mode = 8#100644}, 564 {ok, Reader2} = add_header(Reader, Header, Opts), 565 Padding = skip_padding(byte_size(Bin)), 566 Data = [Bin, <<0:Padding/unit:8>>], 567 case do_write(Reader2, Data) of 568 {ok, _Reader3} -> ok; 569 {error, Reason} -> {error, {NameInArchive, Reason}} 570 end. 571 572add_opts_time(undefined, Now) -> Now; 573add_opts_time(Time, _Now) -> Time. 574 575add_directory(Reader, DirName, NameInArchive, Info, Opts) -> 576 case file:list_dir(DirName) of 577 {ok, []} -> 578 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 579 Header = fileinfo_to_header(NameInArchive, Info, false), 580 add_header(Reader, Header, Opts); 581 {ok, Files} -> 582 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 583 try add_files(Reader, Files, DirName, NameInArchive, Opts) of 584 ok -> ok; 585 {error, _} = Err -> Err 586 catch 587 throw:{error, {_Name, _Reason}} = Err -> Err; 588 throw:{error, Reason} -> {error, {DirName, Reason}} 589 end; 590 {error, Reason} -> 591 {error, {DirName, Reason}} 592 end. 593 594add_files(_Reader, [], _Dir, _DirInArchive, _Opts) -> 595 ok; 596add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) -> 597 FullName = filename:join(Dir, Name), 598 NameInArchive = filename:join(DirInArchive, Name), 599 Res = case Info(FullName) of 600 {error, Reason} -> 601 {error, {FullName, Reason}}; 602 {ok, #file_info{type=directory}=Fi} -> 603 add_directory(Reader, FullName, NameInArchive, Fi, Opts); 604 {ok, #file_info{type=symlink}=Fi} -> 605 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 606 {ok, Linkname} = file:read_link(FullName), 607 Header = fileinfo_to_header(NameInArchive, Fi, Linkname), 608 add_header(Reader, Header, Opts); 609 {ok, #file_info{type=regular}=Fi} -> 610 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 611 Header = fileinfo_to_header(NameInArchive, Fi, false), 612 {ok, Reader2} = add_header(Reader, Header, Opts), 613 FileSize = Header#tar_header.size, 614 {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts), 615 Padding = skip_padding(FileSize), 616 Pad = <<0:Padding/unit:8>>, 617 do_write(Reader3, Pad); 618 {ok, #file_info{}=Fi} -> 619 add_verbose(Opts, "a ~ts~n", [NameInArchive]), 620 Header = fileinfo_to_header(NameInArchive, Fi, false), 621 add_header(Reader, Header, Opts) 622 end, 623 case Res of 624 ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts); 625 {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts); 626 {error, _} = Err -> Err 627 end. 628 629format_string(String, Size) when length(String) > Size -> 630 throw({error, {write_string, field_too_long}}); 631format_string(String, Size) -> 632 Ascii = to_ascii(String), 633 if byte_size(Ascii) < Size -> 634 [Ascii, 0]; 635 true -> 636 Ascii 637 end. 638 639format_octal(Octal) -> 640 iolist_to_binary(io_lib:fwrite("~.8B", [Octal])). 641 642add_header(#reader{}=Reader, #tar_header{}=Header, Opts) -> 643 {ok, Iodata} = build_header(Header, Opts), 644 do_write(Reader, Iodata). 645 646write_to_block(Block, IoData, Start) when is_list(IoData) -> 647 write_to_block(Block, iolist_to_binary(IoData), Start); 648write_to_block(Block, Bin, Start) when is_binary(Bin) -> 649 Size = byte_size(Bin), 650 <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block, 651 <<Head:Start/unit:8, Bin/binary, Rest/binary>>. 652 653build_header(#tar_header{}=Header, Opts) -> 654 #tar_header{ 655 name=Name, 656 mode=Mode, 657 uid=Uid, 658 gid=Gid, 659 size=Size, 660 typeflag=Type, 661 linkname=Linkname, 662 uname=Uname, 663 gname=Gname, 664 devmajor=Devmaj, 665 devminor=Devmin 666 } = Header, 667 Mtime = Header#tar_header.mtime, 668 669 Block0 = ?ZERO_BLOCK, 670 {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}), 671 Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode), 672 {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0), 673 {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1), 674 {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2), 675 {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3), 676 {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4), 677 {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN, 678 Linkname, ?PAX_LINKPATH, Pax5), 679 {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN, 680 Uname, ?PAX_UNAME, Pax6), 681 {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN, 682 Gname, ?PAX_GNAME, Pax7), 683 {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN, 684 Devmaj, ?PAX_NONE, Pax8), 685 {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN, 686 Devmin, ?PAX_NONE, Pax9), 687 {Block13, Pax11} = set_path(Block12, Pax10), 688 PaxEntry = case maps:size(Pax11) of 689 0 -> []; 690 _ -> build_pax_entry(Header, Pax11, Opts) 691 end, 692 Block14 = set_format(Block13, ?FORMAT_USTAR), 693 Block15 = set_checksum(Block14), 694 {ok, [PaxEntry, Block15]}. 695 696set_path(Block0, Pax) -> 697 %% only use ustar header when name is too long 698 case maps:get(?PAX_PATH, Pax, nil) of 699 nil -> 700 {Block0, Pax}; 701 PaxPath -> 702 case split_ustar_path(PaxPath) of 703 {ok, UstarName, UstarPrefix} -> 704 {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, 705 UstarName, ?PAX_NONE, #{}), 706 {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN, 707 UstarPrefix, ?PAX_NONE, #{}), 708 {Block2, maps:remove(?PAX_PATH, Pax)}; 709 false -> 710 {Block0, Pax} 711 end 712 end. 713 714set_format(Block0, Format) 715 when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX -> 716 Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC), 717 write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION); 718set_format(_Block, Format) -> 719 throw({error, {invalid_format, Format}}). 720 721set_checksum(Block) -> 722 Checksum = compute_checksum(Block), 723 write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum). 724 725build_pax_entry(Header, PaxAttrs, Opts) -> 726 Path = Header#tar_header.name, 727 Filename = filename:basename(Path), 728 Dir = filename:dirname(Path), 729 Path2 = filename:join([Dir, "PaxHeaders.0", Filename]), 730 AsciiPath = to_ascii(Path2), 731 Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN -> 732 binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1); 733 true -> 734 AsciiPath 735 end, 736 Keys = maps:keys(PaxAttrs), 737 SortedKeys = lists:sort(Keys), 738 PaxFile = build_pax_file(SortedKeys, PaxAttrs), 739 Size = byte_size(PaxFile), 740 Padding = (?BLOCK_SIZE - 741 (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE, 742 Pad = <<0:Padding/unit:8>>, 743 PaxHeader = #tar_header{ 744 name=unicode:characters_to_list(Path3), 745 size=Size, 746 mtime=Header#tar_header.mtime, 747 atime=Header#tar_header.atime, 748 ctime=Header#tar_header.ctime, 749 typeflag=?TYPE_X_HEADER 750 }, 751 {ok, PaxHeaderData} = build_header(PaxHeader, Opts), 752 [PaxHeaderData, PaxFile, Pad]. 753 754build_pax_file(Keys, PaxAttrs) -> 755 build_pax_file(Keys, PaxAttrs, []). 756build_pax_file([], _, Acc) -> 757 unicode:characters_to_binary(Acc); 758build_pax_file([K|Rest], Attrs, Acc) -> 759 V = maps:get(K, Attrs), 760 Size = sizeof(K) + sizeof(V) + 3, 761 Size2 = sizeof(Size) + Size, 762 Key = to_string(K), 763 Value = to_string(V), 764 Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])), 765 if byte_size(Record) =/= Size2 -> 766 Size3 = byte_size(Record), 767 Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]), 768 build_pax_file(Rest, Attrs, [Acc, Record2]); 769 true -> 770 build_pax_file(Rest, Attrs, [Acc, Record]) 771 end. 772 773sizeof(Bin) when is_binary(Bin) -> 774 byte_size(Bin); 775sizeof(List) when is_list(List) -> 776 length(List); 777sizeof(N) when is_integer(N) -> 778 byte_size(integer_to_binary(N)); 779sizeof(N) when is_float(N) -> 780 byte_size(float_to_binary(N)). 781 782to_string(Bin) when is_binary(Bin) -> 783 unicode:characters_to_list(Bin); 784to_string(List) when is_list(List) -> 785 List; 786to_string(N) when is_integer(N) -> 787 integer_to_list(N); 788to_string(N) when is_float(N) -> 789 float_to_list(N). 790 791split_ustar_path(Path) -> 792 Len = length(Path), 793 NotAscii = not is_ascii(Path), 794 if Len =< ?V7_NAME_LEN; NotAscii -> 795 false; 796 true -> 797 PathBin = binary:list_to_bin(Path), 798 case binary:split(PathBin, [<<$/>>], [global, trim_all]) of 799 [Part] when byte_size(Part) >= ?V7_NAME_LEN -> 800 false; 801 Parts -> 802 case lists:last(Parts) of 803 Name when byte_size(Name) >= ?V7_NAME_LEN -> 804 false; 805 Name -> 806 Parts2 = lists:sublist(Parts, length(Parts) - 1), 807 join_split_ustar_path(Parts2, {ok, Name, nil}) 808 end 809 end 810 end. 811 812join_split_ustar_path([], Acc) -> 813 Acc; 814join_split_ustar_path([Part|_], {ok, _, nil}) 815 when byte_size(Part) > ?USTAR_PREFIX_LEN -> 816 false; 817join_split_ustar_path([Part|_], {ok, _Name, Acc}) 818 when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN -> 819 false; 820join_split_ustar_path([Part|Rest], {ok, Name, nil}) -> 821 join_split_ustar_path(Rest, {ok, Name, Part}); 822join_split_ustar_path([Part|Rest], {ok, Name, Acc}) -> 823 join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}). 824 825write_octal(Block, Pos, Size, X) -> 826 Octal = zero_pad(format_octal(X), Size-1), 827 if byte_size(Octal) < Size -> 828 write_to_block(Block, Octal, Pos); 829 true -> 830 throw({error, {write_failed, octal_field_too_long}}) 831 end. 832 833write_string(Block, Pos, Size, Str, PaxAttr, Pax0) -> 834 NotAscii = not is_ascii(Str), 835 if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) -> 836 Pax1 = maps:put(PaxAttr, Str, Pax0), 837 {Block, Pax1}; 838 true -> 839 Formatted = format_string(Str, Size), 840 {write_to_block(Block, Formatted, Pos), Pax0} 841 end. 842write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) -> 843 %% attempt octal 844 Octal = zero_pad(format_octal(X), Size-1), 845 if byte_size(Octal) < Size -> 846 {write_to_block(Block, [Octal, 0], Pos), Pax0}; 847 PaxAttr =/= ?PAX_NONE -> 848 Pax1 = maps:put(PaxAttr, X, Pax0), 849 {Block, Pax1}; 850 true -> 851 throw({error, {write_failed, numeric_field_too_long}}) 852 end. 853 854zero_pad(Str, Size) when byte_size(Str) >= Size -> 855 Str; 856zero_pad(Str, Size) -> 857 Padding = Size - byte_size(Str), 858 Pad = binary:copy(<<$0>>, Padding), 859 <<Pad/binary, Str/binary>>. 860 861 862%%%================================================================ 863%% Functions for creating or modifying tar archives 864 865read_block(Reader) -> 866 case do_read(Reader, ?BLOCK_SIZE) of 867 eof -> 868 throw({error, eof}); 869 %% Two zero blocks mark the end of the archive 870 {ok, ?ZERO_BLOCK, Reader1} -> 871 case do_read(Reader1, ?BLOCK_SIZE) of 872 eof -> 873 % This is technically a malformed end-of-archive marker, 874 % as two ZERO_BLOCKs are expected as the marker, 875 % but if we've already made it this far, we should just ignore it 876 eof; 877 {ok, ?ZERO_BLOCK, _Reader2} -> 878 eof; 879 {ok, _Block, _Reader2} -> 880 throw({error, invalid_end_of_archive}); 881 {error,_} = Err -> 882 throw(Err) 883 end; 884 {ok, Block, Reader1} when is_binary(Block) -> 885 {ok, Block, Reader1}; 886 {error, _} = Err -> 887 throw(Err) 888 end. 889 890get_header(#reader{}=Reader) -> 891 case read_block(Reader) of 892 eof -> 893 eof; 894 {ok, Block, Reader1} -> 895 convert_header(Block, Reader1) 896 end. 897 898%% Converts the tar header to a record. 899to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 900 #header_v7{ 901 name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN), 902 mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN), 903 uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN), 904 gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN), 905 size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN), 906 mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN), 907 checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN), 908 typeflag=binary:at(Bin, ?V7_TYPE), 909 linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN) 910 }; 911to_v7(_) -> 912 {error, header_block_too_small}. 913 914to_gnu(#header_v7{}=V7, Bin) 915 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 916 #header_gnu{ 917 header_v7=V7, 918 magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN), 919 version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN), 920 uname=binary_part(Bin, 265, 32), 921 gname=binary_part(Bin, 297, 32), 922 devmajor=binary_part(Bin, 329, 8), 923 devminor=binary_part(Bin, 337, 8), 924 atime=binary_part(Bin, 345, 12), 925 ctime=binary_part(Bin, 357, 12), 926 sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)), 927 real_size=binary_part(Bin, 483, 12) 928 }. 929 930to_star(#header_v7{}=V7, Bin) 931 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 932 #header_star{ 933 header_v7=V7, 934 magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), 935 version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), 936 uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), 937 gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), 938 devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), 939 devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), 940 prefix=binary_part(Bin, 345, 131), 941 atime=binary_part(Bin, 476, 12), 942 ctime=binary_part(Bin, 488, 12), 943 trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN) 944 }. 945 946to_ustar(#header_v7{}=V7, Bin) 947 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 948 #header_ustar{ 949 header_v7=V7, 950 magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), 951 version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), 952 uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), 953 gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), 954 devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), 955 devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), 956 prefix=binary_part(Bin, 345, 155) 957 }. 958 959to_sparse_array(Bin) when is_binary(Bin) -> 960 MaxEntries = byte_size(Bin) div 24, 961 IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries), 962 Entries = parse_sparse_entries(Bin, MaxEntries-1, []), 963 #sparse_array{ 964 entries=Entries, 965 max_entries=MaxEntries, 966 is_extended=IsExtended 967 }. 968 969parse_sparse_entries(<<>>, _, Acc) -> 970 Acc; 971parse_sparse_entries(_, -1, Acc) -> 972 Acc; 973parse_sparse_entries(Bin, N, Acc) -> 974 case to_sparse_entry(binary_part(Bin, N*24, 24)) of 975 nil -> 976 parse_sparse_entries(Bin, N-1, Acc); 977 Entry = #sparse_entry{} -> 978 parse_sparse_entries(Bin, N-1, [Entry|Acc]) 979 end. 980 981-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>). 982to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 -> 983 OffsetBin = binary_part(Bin, 0, 12), 984 NumBytesBin = binary_part(Bin, 12, 12), 985 case {OffsetBin, NumBytesBin} of 986 {?EMPTY_ENTRY, ?EMPTY_ENTRY} -> 987 nil; 988 _ -> 989 #sparse_entry{ 990 offset=parse_numeric(OffsetBin), 991 num_bytes=parse_numeric(NumBytesBin)} 992 end. 993 994-spec get_format(binary()) -> {ok, pos_integer(), header_v7()} 995 | ?FORMAT_UNKNOWN 996 | {error, term()}. 997get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 998 do_get_format(to_v7(Bin), Bin). 999 1000do_get_format({error, _} = Err, _Bin) -> 1001 Err; 1002do_get_format(#header_v7{}=V7, Bin) 1003 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1004 Checksum = parse_octal(V7#header_v7.checksum), 1005 Chk1 = compute_checksum(Bin), 1006 Chk2 = compute_signed_checksum(Bin), 1007 if Checksum =/= Chk1 andalso Checksum =/= Chk2 -> 1008 ?FORMAT_UNKNOWN; 1009 true -> 1010 %% guess magic 1011 Ustar = to_ustar(V7, Bin), 1012 Star = to_star(V7, Bin), 1013 Magic = Ustar#header_ustar.magic, 1014 Version = Ustar#header_ustar.version, 1015 Trailer = Star#header_star.trailer, 1016 Format = if 1017 Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR -> 1018 ?FORMAT_STAR; 1019 Magic =:= ?MAGIC_USTAR -> 1020 ?FORMAT_USTAR; 1021 Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU -> 1022 ?FORMAT_GNU; 1023 true -> 1024 ?FORMAT_V7 1025 end, 1026 {ok, Format, V7} 1027 end. 1028 1029unpack_format(Format, #header_v7{}=V7, Bin, Reader) 1030 when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> 1031 Mtime = parse_numeric(V7#header_v7.mtime), 1032 Header0 = #tar_header{ 1033 name=parse_string(V7#header_v7.name), 1034 mode=parse_numeric(V7#header_v7.mode), 1035 uid=parse_numeric(V7#header_v7.uid), 1036 gid=parse_numeric(V7#header_v7.gid), 1037 size=parse_numeric(V7#header_v7.size), 1038 mtime=Mtime, 1039 atime=Mtime, 1040 ctime=Mtime, 1041 typeflag=V7#header_v7.typeflag, 1042 linkname=parse_string(V7#header_v7.linkname) 1043 }, 1044 Typeflag = Header0#tar_header.typeflag, 1045 Header1 = if Format > ?FORMAT_V7 -> 1046 unpack_modern(Format, V7, Bin, Header0); 1047 true -> 1048 Name = Header0#tar_header.name, 1049 Header0#tar_header{name=safe_join_path("", Name)} 1050 end, 1051 HeaderOnly = is_header_only_type(Typeflag), 1052 Header2 = if HeaderOnly -> 1053 Header1#tar_header{size=0}; 1054 true -> 1055 Header1 1056 end, 1057 if Typeflag =:= ?TYPE_GNU_SPARSE -> 1058 Gnu = to_gnu(V7, Bin), 1059 RealSize = parse_numeric(Gnu#header_gnu.real_size), 1060 {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader), 1061 Header3 = Header2#tar_header{size=RealSize}, 1062 {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)}; 1063 true -> 1064 FileReader = #reg_file_reader{ 1065 handle=Reader, 1066 num_bytes=Header2#tar_header.size, 1067 size=Header2#tar_header.size, 1068 pos = 0 1069 }, 1070 {Header2, FileReader} 1071 end. 1072 1073unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0) 1074 when is_binary(Bin) -> 1075 Typeflag = Header0#tar_header.typeflag, 1076 Ustar = to_ustar(V7, Bin), 1077 H0 = Header0#tar_header{ 1078 uname=parse_string(Ustar#header_ustar.uname), 1079 gname=parse_string(Ustar#header_ustar.gname)}, 1080 H1 = if Typeflag =:= ?TYPE_CHAR 1081 orelse Typeflag =:= ?TYPE_BLOCK -> 1082 Ma = parse_numeric(Ustar#header_ustar.devmajor), 1083 Mi = parse_numeric(Ustar#header_ustar.devminor), 1084 H0#tar_header{ 1085 devmajor=Ma, 1086 devminor=Mi 1087 }; 1088 true -> 1089 H0 1090 end, 1091 {Prefix, H2} = case Format of 1092 ?FORMAT_USTAR -> 1093 {parse_string(Ustar#header_ustar.prefix), H1}; 1094 ?FORMAT_STAR -> 1095 Star = to_star(V7, Bin), 1096 Prefix0 = parse_string(Star#header_star.prefix), 1097 Atime0 = Star#header_star.atime, 1098 Atime = parse_numeric(Atime0), 1099 Ctime0 = Star#header_star.ctime, 1100 Ctime = parse_numeric(Ctime0), 1101 {Prefix0, H1#tar_header{ 1102 atime=Atime, 1103 ctime=Ctime 1104 }}; 1105 _ -> 1106 {"", H1} 1107 end, 1108 Name = H2#tar_header.name, 1109 H2#tar_header{name=safe_join_path(Prefix, Name)}. 1110 1111 1112safe_join_path([], Name) -> 1113 filename:join([Name]); 1114safe_join_path(Prefix, []) -> 1115 filename:join([Prefix]); 1116safe_join_path(Prefix, Name) -> 1117 filename:join(Prefix, Name). 1118 1119new_sparse_file_reader(Reader, Sparsemap, RealSize) -> 1120 true = validate_sparse_entries(Sparsemap, RealSize), 1121 #sparse_file_reader{ 1122 handle = Reader, 1123 num_bytes = RealSize, 1124 pos = 0, 1125 size = RealSize, 1126 sparse_map = Sparsemap}. 1127 1128validate_sparse_entries(Entries, RealSize) -> 1129 validate_sparse_entries(Entries, RealSize, 0, 0). 1130validate_sparse_entries([], _RealSize, _I, _LastOffset) -> 1131 true; 1132validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) -> 1133 Offset = Entry#sparse_entry.offset, 1134 NumBytes = Entry#sparse_entry.num_bytes, 1135 if 1136 Offset > ?MAX_INT64-NumBytes -> 1137 throw({error, {invalid_sparse_map_entry, offset_too_large}}); 1138 Offset+NumBytes > RealSize -> 1139 throw({error, {invalid_sparse_map_entry, offset_too_large}}); 1140 I > 0 andalso LastOffset > Offset -> 1141 throw({error, {invalid_sparse_map_entry, overlapping_offsets}}); 1142 true -> 1143 ok 1144 end, 1145 validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes). 1146 1147 1148-spec parse_sparse_map(header_gnu(), descriptor_type()) -> 1149 {[sparse_entry()], descriptor_type()}. 1150parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) 1151 when Sparse#sparse_array.is_extended -> 1152 parse_sparse_map(Sparse, Reader, []); 1153parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) -> 1154 {Sparse#sparse_array.entries, Reader}. 1155parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) -> 1156 case read_block(Reader) of 1157 eof -> 1158 throw({error, eof}); 1159 {ok, Block, Reader2} -> 1160 Sparse2 = to_sparse_array(Block), 1161 parse_sparse_map(Sparse2, Reader2, Entries++Acc) 1162 end; 1163parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) -> 1164 Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) -> 1165 A =< B 1166 end, Entries++Acc), 1167 {Sorted, Reader}. 1168 1169%% Defined by taking the sum of the unsigned byte values of the 1170%% entire header record, treating the checksum bytes to as ASCII spaces 1171compute_checksum(<<H1:?V7_CHKSUM/binary, 1172 H2:?V7_CHKSUM_LEN/binary, 1173 Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary, 1174 _/binary>>) -> 1175 C0 = checksum(H1) + (byte_size(H2) * $\s), 1176 C1 = checksum(Rest), 1177 C0 + C1. 1178 1179compute_signed_checksum(<<H1:?V7_CHKSUM/binary, 1180 H2:?V7_CHKSUM_LEN/binary, 1181 Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary, 1182 _/binary>>) -> 1183 C0 = signed_checksum(H1) + (byte_size(H2) * $\s), 1184 C1 = signed_checksum(Rest), 1185 C0 + C1. 1186 1187%% Returns the checksum of a binary. 1188checksum(Bin) -> checksum(Bin, 0). 1189checksum(<<A/unsigned,Rest/binary>>, Sum) -> 1190 checksum(Rest, Sum+A); 1191checksum(<<>>, Sum) -> Sum. 1192 1193signed_checksum(Bin) -> signed_checksum(Bin, 0). 1194signed_checksum(<<A/signed,Rest/binary>>, Sum) -> 1195 signed_checksum(Rest, Sum+A); 1196signed_checksum(<<>>, Sum) -> Sum. 1197 1198-spec parse_numeric(binary()) -> non_neg_integer(). 1199parse_numeric(<<>>) -> 1200 0; 1201parse_numeric(<<First, _/binary>> = Bin) -> 1202 %% check for base-256 format first 1203 %% if the bit is set, then all following bits constitute a two's 1204 %% complement encoded number in big-endian byte order 1205 if 1206 First band 16#80 =/= 0 -> 1207 %% Handling negative numbers relies on the following identity: 1208 %% -a-1 == ^a 1209 %% If the number is negative, we use an inversion mask to invert 1210 %% the data bytes and treat the value as an unsigned number 1211 Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end, 1212 Bytes = binary:bin_to_list(Bin), 1213 Reducer = fun (C, {I, X}) -> 1214 C1 = C bxor Inv, 1215 C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end, 1216 if (X bsr 56) > 0 -> 1217 throw({error,integer_overflow}); 1218 true -> 1219 {I+1, (X bsl 8) bor C2} 1220 end 1221 end, 1222 {_, N} = lists:foldl(Reducer, {0,0}, Bytes), 1223 if (N bsr 63) > 0 -> 1224 throw({error, integer_overflow}); 1225 true -> 1226 if Inv =:= 16#FF -> 1227 -1 bxor N; 1228 true -> 1229 N 1230 end 1231 end; 1232 true -> 1233 %% normal case is an octal number 1234 parse_octal(Bin) 1235 end. 1236 1237parse_octal(Bin) when is_binary(Bin) -> 1238 %% skip leading/trailing zero bytes and spaces 1239 do_parse_octal(Bin, <<>>). 1240do_parse_octal(<<>>, <<>>) -> 1241 0; 1242do_parse_octal(<<>>, Acc) -> 1243 case io_lib:fread("~8u", binary:bin_to_list(Acc)) of 1244 {error, _} -> throw({error, invalid_tar_checksum}); 1245 {ok, [Octal], []} -> Octal; 1246 {ok, _, _} -> throw({error, invalid_tar_checksum}) 1247 end; 1248do_parse_octal(<<$\s,Rest/binary>>, Acc) -> 1249 do_parse_octal(Rest, Acc); 1250do_parse_octal(<<0, Rest/binary>>, Acc) -> 1251 do_parse_octal(Rest, Acc); 1252do_parse_octal(<<C, Rest/binary>>, Acc) -> 1253 do_parse_octal(Rest, <<Acc/binary, C>>). 1254 1255parse_string(Bin) when is_binary(Bin) -> 1256 do_parse_string(Bin, <<>>). 1257do_parse_string(<<>>, Acc) -> 1258 case unicode:characters_to_list(Acc) of 1259 Str when is_list(Str) -> 1260 Str; 1261 {incomplete, _Str, _Rest} -> 1262 binary:bin_to_list(Acc); 1263 {error, _Str, _Rest} -> 1264 throw({error, {bad_header, invalid_string}}) 1265 end; 1266do_parse_string(<<0, _/binary>>, Acc) -> 1267 do_parse_string(<<>>, Acc); 1268do_parse_string(<<C, Rest/binary>>, Acc) -> 1269 do_parse_string(Rest, <<Acc/binary, C>>). 1270 1271convert_header(Bin, #reader{pos=Pos}=Reader) 1272 when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 -> 1273 case get_format(Bin) of 1274 ?FORMAT_UNKNOWN -> 1275 throw({error, bad_header}); 1276 {ok, Format, V7} -> 1277 unpack_format(Format, V7, Bin, Reader); 1278 {error, Reason} -> 1279 throw({error, {bad_header, Reason}}) 1280 end; 1281convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE -> 1282 throw({error, misaligned_read, Pos}); 1283convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 -> 1284 eof; 1285convert_header(_Bin, _Reader) -> 1286 throw({error, eof}). 1287 1288%% Creates a partially-populated header record based 1289%% on the provided file_info record. If the file is 1290%% a symlink, then `link` is used as the link target. 1291%% If the file is a directory, a slash is appended to the name. 1292fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) -> 1293 BaseHeader = #tar_header{name=Name, 1294 mtime=Fi#file_info.mtime, 1295 atime=Fi#file_info.atime, 1296 ctime=Fi#file_info.ctime, 1297 mode=Fi#file_info.mode, 1298 uid=Fi#file_info.uid, 1299 gid=Fi#file_info.gid, 1300 typeflag=?TYPE_REGULAR}, 1301 do_fileinfo_to_header(BaseHeader, Fi, Link). 1302 1303do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) -> 1304 Header#tar_header{size=Size,typeflag=?TYPE_REGULAR}; 1305do_fileinfo_to_header(#tar_header{name=Name}=Header, 1306 #file_info{type=directory}, _Link) -> 1307 Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR}; 1308do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) -> 1309 Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link}; 1310do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) 1311 when (Mode band ?S_IFMT) =:= ?S_IFCHR -> 1312 Header#tar_header{typeflag=?TYPE_CHAR, 1313 devmajor=Fi#file_info.major_device, 1314 devminor=Fi#file_info.minor_device}; 1315do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) 1316 when (Mode band ?S_IFMT) =:= ?S_IFBLK -> 1317 Header#tar_header{typeflag=?TYPE_BLOCK, 1318 devmajor=Fi#file_info.major_device, 1319 devminor=Fi#file_info.minor_device}; 1320do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link) 1321 when (Mode band ?S_IFMT) =:= ?S_FIFO -> 1322 Header#tar_header{typeflag=?TYPE_FIFO}; 1323do_fileinfo_to_header(Header, Fi, _Link) -> 1324 {error, {invalid_file_type, Header#tar_header.name, Fi}}. 1325 1326is_ascii(Str) when is_list(Str) -> 1327 not lists:any(fun (Char) -> Char >= 16#80 end, Str); 1328is_ascii(Bin) when is_binary(Bin) -> 1329 is_ascii1(Bin). 1330 1331is_ascii1(<<>>) -> 1332 true; 1333is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 -> 1334 false; 1335is_ascii1(<<_, Rest/binary>>) -> 1336 is_ascii1(Rest). 1337 1338to_ascii(Str) when is_list(Str) -> 1339 case is_ascii(Str) of 1340 true -> 1341 unicode:characters_to_binary(Str); 1342 false -> 1343 Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str), 1344 unicode:characters_to_binary(Chars) 1345 end; 1346to_ascii(Bin) when is_binary(Bin) -> 1347 to_ascii(Bin, <<>>). 1348to_ascii(<<>>, Acc) -> 1349 Acc; 1350to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 -> 1351 to_ascii(Rest, <<Acc/binary,C>>); 1352to_ascii(<<_, Rest/binary>>, Acc) -> 1353 to_ascii(Rest, Acc). 1354 1355is_header_only_type(?TYPE_SYMLINK) -> true; 1356is_header_only_type(?TYPE_LINK) -> true; 1357is_header_only_type(?TYPE_DIR) -> true; 1358is_header_only_type(_) -> false. 1359 1360foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts) 1361 when is_function(Fun,4) -> 1362 case foldl_read0(Reader, Fun, Accu, Opts) of 1363 {ok, Result, _Reader2} -> 1364 Result; 1365 {error, _} = Err -> 1366 Err 1367 end; 1368foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) -> 1369 {error, {read_mode_expected, Access}}; 1370foldl_read(TarName, Fun, Accu, #read_opts{}=Opts) 1371 when is_function(Fun,4) -> 1372 try open(TarName, [read|Opts#read_opts.open_mode]) of 1373 {ok, #reader{access=read}=Reader} -> 1374 try 1375 foldl_read(Reader, Fun, Accu, Opts) 1376 after 1377 _ = close(Reader) 1378 end; 1379 {error, _} = Err -> 1380 Err 1381 catch 1382 throw:Err -> 1383 Err 1384 end. 1385 1386foldl_read0(Reader, Fun, Accu, Opts) -> 1387 try foldl_read1(Fun, Accu, Reader, Opts, #{}) of 1388 {ok,_,_} = Ok -> 1389 Ok 1390 catch 1391 throw:{error, {Reason, Format, Args}} -> 1392 read_verbose(Opts, Format, Args), 1393 {error, Reason}; 1394 throw:Err -> 1395 Err 1396 end. 1397 1398foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) -> 1399 {ok, Reader1} = skip_unread(Reader0), 1400 case get_header(Reader1) of 1401 eof -> 1402 Fun(eof, Reader1, Opts, Accu0); 1403 {Header, Reader2} -> 1404 case Header#tar_header.typeflag of 1405 ?TYPE_X_HEADER -> 1406 {ExtraHeaders2, Reader3} = parse_pax(Reader2), 1407 ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2), 1408 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3); 1409 ?TYPE_GNU_LONGNAME -> 1410 {RealName, Reader3} = get_real_name(Reader2), 1411 ExtraHeaders2 = maps:put(?PAX_PATH, 1412 parse_string(RealName), ExtraHeaders), 1413 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); 1414 ?TYPE_GNU_LONGLINK -> 1415 {RealName, Reader3} = get_real_name(Reader2), 1416 ExtraHeaders2 = maps:put(?PAX_LINKPATH, 1417 parse_string(RealName), ExtraHeaders), 1418 foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); 1419 _ -> 1420 Header1 = merge_pax(Header, ExtraHeaders), 1421 {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0), 1422 foldl_read1(Fun, NewAccu, Reader3, Opts, #{}) 1423 end 1424 end. 1425 1426%% Applies all known PAX attributes to the current tar header 1427-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header(). 1428merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) -> 1429 do_merge_pax(Header, maps:to_list(ExtraHeaders)). 1430 1431do_merge_pax(Header, []) -> 1432 Header; 1433do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) -> 1434 do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest); 1435do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) -> 1436 do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest); 1437do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) -> 1438 do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest); 1439do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) -> 1440 do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest); 1441do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) -> 1442 Uid2 = binary_to_integer(Uid), 1443 do_merge_pax(Header#tar_header{uid=Uid2}, Rest); 1444do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) -> 1445 Gid2 = binary_to_integer(Gid), 1446 do_merge_pax(Header#tar_header{gid=Gid2}, Rest); 1447do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) -> 1448 Atime2 = parse_pax_time(Atime), 1449 do_merge_pax(Header#tar_header{atime=Atime2}, Rest); 1450do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) -> 1451 Mtime2 = parse_pax_time(Mtime), 1452 do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest); 1453do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) -> 1454 Ctime2 = parse_pax_time(Ctime), 1455 do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest); 1456do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) -> 1457 Size2 = binary_to_integer(Size), 1458 do_merge_pax(Header#tar_header{size=Size2}, Rest); 1459do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) -> 1460 do_merge_pax(Header, Rest); 1461do_merge_pax(Header, [_Ignore|Rest]) -> 1462 do_merge_pax(Header, Rest). 1463 1464%% Returns the time since UNIX epoch as a datetime 1465-spec parse_pax_time(binary()) -> tar_time(). 1466parse_pax_time(Bin) when is_binary(Bin) -> 1467 TotalNano = case binary:split(Bin, [<<$.>>]) of 1468 [SecondsStr, NanoStr0] -> 1469 Seconds = binary_to_integer(SecondsStr), 1470 if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE -> 1471 %% right pad 1472 PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0), 1473 Padding = binary:copy(<<$0>>, PaddingN), 1474 NanoStr1 = <<NanoStr0/binary,Padding/binary>>, 1475 Nano = binary_to_integer(NanoStr1), 1476 (Seconds*?BILLION)+Nano; 1477 byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE -> 1478 %% right truncate 1479 NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE), 1480 Nano = binary_to_integer(NanoStr1), 1481 (Seconds*?BILLION)+Nano; 1482 true -> 1483 (Seconds*?BILLION)+binary_to_integer(NanoStr0) 1484 end; 1485 [SecondsStr] -> 1486 binary_to_integer(SecondsStr)*?BILLION 1487 end, 1488 %% truncate to microseconds 1489 Micro = TotalNano div 1000, 1490 Mega = Micro div 1000000000000, 1491 Secs = Micro div 1000000 - (Mega*1000000), 1492 Secs. 1493 1494%% Given a regular file reader, reads the whole file and 1495%% parses all extended attributes it contains. 1496parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1497 {#{}, Handle}; 1498parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> 1499 case do_read(Handle0, NumBytes) of 1500 {ok, Bytes, Handle1} -> 1501 do_parse_pax(Handle1, Bytes, #{}); 1502 {error, _} = Err -> 1503 throw(Err) 1504 end. 1505 1506do_parse_pax(Reader, <<>>, Headers) -> 1507 {Headers, Reader}; 1508do_parse_pax(Reader, Bin, Headers) -> 1509 {Key, Value, Residual} = parse_pax_record(Bin), 1510 NewHeaders = maps:put(Key, Value, Headers), 1511 do_parse_pax(Reader, Residual, NewHeaders). 1512 1513%% Parse an extended attribute 1514parse_pax_record(Bin) when is_binary(Bin) -> 1515 case binary:split(Bin, [<<$\n>>]) of 1516 [Record, Residual] -> 1517 case binary:split(Record, [<<$\s>>], [trim_all]) of 1518 [_Len, Record1] -> 1519 case binary:split(Record1, [<<$=>>], [trim_all]) of 1520 [AttrName, AttrValue] -> 1521 {AttrName, AttrValue, Residual}; 1522 _Other -> 1523 throw({error, malformed_pax_record}) 1524 end; 1525 _Other -> 1526 throw({error, malformed_pax_record}) 1527 end; 1528 _Other -> 1529 throw({error, malformed_pax_record}) 1530 end. 1531 1532get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1533 {"", Handle}; 1534get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> 1535 case do_read(Handle0, NumBytes) of 1536 {ok, RealName, Handle1} -> 1537 {RealName, Handle1}; 1538 {error, _} = Err -> 1539 throw(Err) 1540 end; 1541get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) -> 1542 case do_read(Reader0, NumBytes) of 1543 {ok, RealName, Reader1} -> 1544 {RealName, Reader1}; 1545 {error, _} = Err -> 1546 throw(Err) 1547 end. 1548 1549%% Skip the remaining bytes for the current file entry 1550skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) -> 1551 Padding = skip_padding(Size), 1552 AbsPos = Handle0#reader.pos + (Size-Pos) + Padding, 1553 case do_position(Handle0, AbsPos) of 1554 {ok, _, Handle1} -> 1555 Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size}; 1556 Err -> 1557 throw(Err) 1558 end; 1559skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) -> 1560 case do_read(Reader, Size-Pos) of 1561 {ok, _, Reader2} -> 1562 Reader2; 1563 Err -> 1564 throw(Err) 1565 end. 1566 1567skip_padding(0) -> 1568 0; 1569skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 -> 1570 0; 1571skip_padding(Size) when Size =< ?BLOCK_SIZE -> 1572 ?BLOCK_SIZE - Size; 1573skip_padding(Size) -> 1574 ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE). 1575 1576skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 -> 1577 Padding = skip_padding(Pos + ?BLOCK_SIZE), 1578 AbsPos = Pos + Padding, 1579 case do_position(Reader0, AbsPos) of 1580 {ok, _, Reader1} -> 1581 {ok, Reader1}; 1582 Err -> 1583 throw(Err) 1584 end; 1585skip_unread(#reader{}=Reader) -> 1586 {ok, Reader}; 1587skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) -> 1588 skip_unread(Handle); 1589skip_unread(#reg_file_reader{}=Reader) -> 1590 #reg_file_reader{handle=Handle} = skip_file(Reader), 1591 {ok, Handle}; 1592skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) -> 1593 skip_unread(Handle); 1594skip_unread(#sparse_file_reader{}=Reader) -> 1595 #sparse_file_reader{handle=Handle} = skip_file(Reader), 1596 {ok, Handle}. 1597 1598write_extracted_element(#tar_header{name=Name,typeflag=Type}, 1599 Bin, 1600 #read_opts{output=memory}=Opts) -> 1601 case typeflag(Type) of 1602 regular -> 1603 read_verbose(Opts, "x ~ts~n", [Name]), 1604 {ok, {Name, Bin}}; 1605 _ -> 1606 ok 1607 end; 1608write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) -> 1609 Name1 = make_safe_path(Name0, Opts), 1610 Created = 1611 case typeflag(Header#tar_header.typeflag) of 1612 regular -> 1613 create_regular(Name1, Name0, Bin, Opts); 1614 directory -> 1615 read_verbose(Opts, "x ~ts~n", [Name0]), 1616 create_extracted_dir(Name1, Opts); 1617 symlink -> 1618 read_verbose(Opts, "x ~ts~n", [Name0]), 1619 LinkName = safe_link_name(Header, Opts), 1620 create_symlink(Name1, LinkName, Opts); 1621 Device when Device =:= char orelse Device =:= block -> 1622 %% char/block devices will be created as empty files 1623 %% and then have their major/minor device set later 1624 create_regular(Name1, Name0, <<>>, Opts); 1625 fifo -> 1626 %% fifo devices will be created as empty files 1627 create_regular(Name1, Name0, <<>>, Opts); 1628 Other -> % Ignore. 1629 read_verbose(Opts, "x ~ts - unsupported type ~p~n", 1630 [Name0, Other]), 1631 not_written 1632 end, 1633 case Created of 1634 ok -> set_extracted_file_info(Name1, Header); 1635 not_written -> ok 1636 end. 1637 1638make_safe_path([$/|Path], Opts) -> 1639 make_safe_path(Path, Opts); 1640make_safe_path(Path, #read_opts{cwd=Cwd}) -> 1641 case filename:safe_relative_path(Path) of 1642 unsafe -> 1643 throw({error,{Path,unsafe_path}}); 1644 SafePath -> 1645 filename:absname(SafePath, Cwd) 1646 end. 1647 1648safe_link_name(#tar_header{linkname=Path}, #read_opts{cwd=Cwd}) -> 1649 case safe_relative_path_links(Path, Cwd) of 1650 unsafe -> 1651 throw({error,{Path,unsafe_symlink}}); 1652 SafePath -> 1653 SafePath 1654 end. 1655 1656safe_relative_path_links(Path, Cwd) -> 1657 case filename:pathtype(Path) of 1658 relative -> safe_relative_path_links(filename:split(Path), Cwd, [], ""); 1659 _ -> unsafe 1660 end. 1661 1662safe_relative_path_links([Segment|Segments], Cwd, PrevSegments, Acc) -> 1663 AccSegment = join(Acc, Segment), 1664 case lists:member(AccSegment, PrevSegments) of 1665 true -> 1666 unsafe; 1667 false -> 1668 case file:read_link(join(Cwd, AccSegment)) of 1669 {ok, LinkPath} -> 1670 case filename:pathtype(LinkPath) of 1671 relative -> 1672 safe_relative_path_links(filename:split(LinkPath) ++ Segments, 1673 Cwd, [AccSegment|PrevSegments], Acc); 1674 _ -> 1675 unsafe 1676 end; 1677 1678 {error, _} -> 1679 case filename:safe_relative_path(join(Acc, Segment)) of 1680 unsafe -> 1681 unsafe; 1682 NewAcc -> 1683 safe_relative_path_links(Segments, Cwd, 1684 [AccSegment|PrevSegments], NewAcc) 1685 end 1686 end 1687 end; 1688safe_relative_path_links([], _Cwd, _PrevSegments, Acc) -> 1689 Acc. 1690 1691join([], Path) -> Path; 1692join(Left, Right) -> filename:join(Left, Right). 1693 1694create_regular(Name, NameInArchive, Bin, Opts) -> 1695 case write_extracted_file(Name, Bin, Opts) of 1696 not_written -> 1697 read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]), 1698 not_written; 1699 Ok -> 1700 read_verbose(Opts, "x ~ts~n", [NameInArchive]), 1701 Ok 1702 end. 1703 1704create_extracted_dir(Name, _Opts) -> 1705 case file:make_dir(Name) of 1706 ok -> ok; 1707 {error,enotsup} -> not_written; 1708 {error,eexist} -> not_written; 1709 {error,enoent} -> make_dirs(Name, dir); 1710 {error,Reason} -> throw({error, Reason}) 1711 end. 1712 1713create_symlink(Name, Linkname, Opts) -> 1714 case file:make_symlink(Linkname, Name) of 1715 ok -> ok; 1716 {error,enoent} -> 1717 ok = make_dirs(Name, file), 1718 create_symlink(Name, Linkname, Opts); 1719 {error,eexist} -> not_written; 1720 {error,enotsup} -> 1721 read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]), 1722 not_written; 1723 {error,Reason} -> throw({error, Reason}) 1724 end. 1725 1726write_extracted_file(Name, Bin, Opts) -> 1727 Write = 1728 case Opts#read_opts.keep_old_files of 1729 true -> 1730 case file:read_file_info(Name) of 1731 {ok, _} -> false; 1732 _ -> true 1733 end; 1734 false -> true 1735 end, 1736 case Write of 1737 true -> write_file(Name, Bin); 1738 false -> not_written 1739 end. 1740 1741write_file(Name, Bin) -> 1742 case file:write_file(Name, Bin) of 1743 ok -> ok; 1744 {error,enoent} -> 1745 case make_dirs(Name, file) of 1746 ok -> 1747 write_file(Name, Bin); 1748 {error,Reason} -> 1749 throw({error, Reason}) 1750 end; 1751 {error,Reason} -> 1752 throw({error, Reason}) 1753 end. 1754 1755set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok; 1756set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK}) -> ok; 1757set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) -> 1758 set_device_info(Name, Header); 1759set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) -> 1760 set_device_info(Name, Header); 1761set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) -> 1762 Info = #file_info{mode=Mode, mtime=Mtime}, 1763 file:write_file_info(Name, Info, [{time, posix}]). 1764 1765set_device_info(Name, #tar_header{}=Header) -> 1766 Mtime = Header#tar_header.mtime, 1767 Mode = Header#tar_header.mode, 1768 Devmajor = Header#tar_header.devmajor, 1769 Devminor = Header#tar_header.devminor, 1770 Info = #file_info{ 1771 mode=Mode, 1772 mtime=Mtime, 1773 major_device=Devmajor, 1774 minor_device=Devminor 1775 }, 1776 file:write_file_info(Name, Info). 1777 1778%% Makes all directories leading up to the file. 1779 1780make_dirs(Name, file) -> 1781 filelib:ensure_dir(Name); 1782make_dirs(Name, dir) -> 1783 filelib:ensure_dir(filename:join(Name,"*")). 1784 1785%% Prints the message on if the verbose option is given (for reading). 1786read_verbose(#read_opts{verbose=true}, Format, Args) -> 1787 io:format(Format, Args); 1788read_verbose(_, _, _) -> 1789 ok. 1790 1791%% Prints the message on if the verbose option is given. 1792add_verbose(#add_opts{verbose=true}, Format, Args) -> 1793 io:format(Format, Args); 1794add_verbose(_, _, _) -> 1795 ok. 1796 1797%%%%%%%%%%%%%%%%%% 1798%% I/O primitives 1799%%%%%%%%%%%%%%%%%% 1800 1801do_write(#reader{handle=Handle,func=Fun}=Reader0, Data) 1802 when is_function(Fun,2) -> 1803 case Fun(write,{Handle,Data}) of 1804 ok -> 1805 {ok, Pos, Reader1} = do_position(Reader0, {cur,0}), 1806 {ok, Reader1#reader{pos=Pos}}; 1807 {error, _} = Err -> 1808 Err 1809 end. 1810 1811do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts) 1812 when is_function(Fun, 2) -> 1813 do_copy(Reader, Source, Opts#add_opts{chunk_size=65536}); 1814do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize}) 1815 when is_function(Fun, 2) -> 1816 case file:open(Source, [read, binary]) of 1817 {ok, SourceFd} -> 1818 case copy_chunked(Reader, SourceFd, ChunkSize, 0) of 1819 {ok, _Copied, _Reader2} = Ok-> 1820 _ = file:close(SourceFd), 1821 Ok; 1822 Err -> 1823 _ = file:close(SourceFd), 1824 throw(Err) 1825 end; 1826 Err -> 1827 throw(Err) 1828 end. 1829 1830copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) -> 1831 case file:read(Source, ChunkSize) of 1832 {ok, Bin} -> 1833 {ok, Reader2} = do_write(Reader, Bin), 1834 copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin)); 1835 eof -> 1836 {ok, Copied, Reader}; 1837 Other -> 1838 Other 1839 end. 1840 1841 1842do_position(#reader{handle=Handle,func=Fun}=Reader, Pos) 1843 when is_function(Fun,2)-> 1844 case Fun(position, {Handle,Pos}) of 1845 {ok, NewPos} -> 1846 %% since Pos may not always be an absolute seek, 1847 %% make sure we update the reader with the new absolute position 1848 {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}), 1849 {ok, NewPos, Reader#reader{pos=AbsPos}}; 1850 Other -> 1851 Other 1852 end. 1853 1854do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) -> 1855 NumBytes = Size - Pos, 1856 ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end, 1857 case do_read(Handle, ActualLen) of 1858 {ok, Bin, Handle2} -> 1859 NewPos = Pos + ActualLen, 1860 NumBytes2 = Size - NewPos, 1861 Reader1 = Reader#reg_file_reader{ 1862 handle=Handle2, 1863 pos=NewPos, 1864 num_bytes=NumBytes2}, 1865 {ok, Bin, Reader1}; 1866 Other -> 1867 Other 1868 end; 1869do_read(#sparse_file_reader{}=Reader, Len) -> 1870 do_sparse_read(Reader, Len); 1871do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len) 1872 when is_function(Fun,2)-> 1873 %% Always convert to binary internally 1874 case Fun(read2,{Handle,Len}) of 1875 {ok, List} when is_list(List) -> 1876 Bin = list_to_binary(List), 1877 NewPos = Pos+byte_size(Bin), 1878 {ok, Bin, Reader#reader{pos=NewPos}}; 1879 {ok, Bin} when is_binary(Bin) -> 1880 NewPos = Pos+byte_size(Bin), 1881 {ok, Bin, Reader#reader{pos=NewPos}}; 1882 Other -> 1883 Other 1884 end. 1885 1886 1887do_sparse_read(Reader, Len) -> 1888 do_sparse_read(Reader, Len, <<>>). 1889 1890do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries] 1891 }=Reader0, Len, Acc) -> 1892 %% skip all empty fragments 1893 Reader1 = Reader0#sparse_file_reader{sparse_map=Entries}, 1894 do_sparse_read(Reader1, Len, Acc); 1895do_sparse_read(#sparse_file_reader{sparse_map=[], 1896 pos=Pos,size=Size}=Reader0, Len, Acc) 1897 when Pos < Size -> 1898 %% if there are no more fragments, it is possible that there is one last sparse hole 1899 %% this behaviour matches the BSD tar utility 1900 %% however, GNU tar stops returning data even if we haven't reached the end 1901 {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len), 1902 do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1903do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) -> 1904 {ok, Acc, Reader}; 1905do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) -> 1906 {ok, Acc, Reader}; 1907do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_], 1908 pos=Pos}=Reader0, Len, Acc) 1909 when Pos < Offset -> 1910 {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos), 1911 do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1912do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries], 1913 pos=Pos}=Reader0, Len, Acc) -> 1914 %% we're in a data fragment, so read from it 1915 %% end offset of fragment 1916 EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes, 1917 %% bytes left in fragment 1918 NumBytes = EndPos - Pos, 1919 ActualLen = if Len > NumBytes -> NumBytes; true -> Len end, 1920 case do_read(Reader0#sparse_file_reader.handle, ActualLen) of 1921 {ok, Bin, Handle} -> 1922 BytesRead = byte_size(Bin), 1923 ActualEndPos = Pos+BytesRead, 1924 Reader1 = if ActualEndPos =:= EndPos -> 1925 Reader0#sparse_file_reader{sparse_map=Entries}; 1926 true -> 1927 Reader0 1928 end, 1929 Size = Reader1#sparse_file_reader.size, 1930 NumBytes2 = Size - ActualEndPos, 1931 Reader2 = Reader1#sparse_file_reader{ 1932 handle=Handle, 1933 pos=ActualEndPos, 1934 num_bytes=NumBytes2}, 1935 do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>); 1936 Other -> 1937 Other 1938 end. 1939 1940%% Reads a sparse hole ending at Offset 1941read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) -> 1942 N = Offset - Pos, 1943 N2 = if N > Len -> 1944 Len; 1945 true -> 1946 N 1947 end, 1948 Bin = <<0:N2/unit:8>>, 1949 NumBytes = Reader#sparse_file_reader.size - (Pos+N2), 1950 {ok, Bin, Reader#sparse_file_reader{ 1951 num_bytes=NumBytes, 1952 pos=Pos+N2}}. 1953 1954-spec do_close(tar_descriptor()) -> ok | {error, term()}. 1955do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) -> 1956 Fun(close,Handle). 1957 1958%%%%%%%%%%%%%%%%%% 1959%% Option parsing 1960%%%%%%%%%%%%%%%%%% 1961 1962extract_opts(List) -> 1963 extract_opts(List, default_options()). 1964 1965table_opts(List) -> 1966 read_opts(List, default_options()). 1967 1968default_options() -> 1969 {ok, Cwd} = file:get_cwd(), 1970 #read_opts{cwd=Cwd}. 1971 1972extract_opts([keep_old_files|Rest], Opts) -> 1973 extract_opts(Rest, Opts#read_opts{keep_old_files=true}); 1974extract_opts([{cwd, Cwd}|Rest], Opts) -> 1975 extract_opts(Rest, Opts#read_opts{cwd=Cwd}); 1976extract_opts([{files, Files}|Rest], Opts) -> 1977 Set = ordsets:from_list(Files), 1978 extract_opts(Rest, Opts#read_opts{files=Set}); 1979extract_opts([memory|Rest], Opts) -> 1980 extract_opts(Rest, Opts#read_opts{output=memory}); 1981extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1982 extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); 1983extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1984 extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); 1985extract_opts([verbose|Rest], Opts) -> 1986 extract_opts(Rest, Opts#read_opts{verbose=true}); 1987extract_opts([Other|Rest], Opts) -> 1988 extract_opts(Rest, read_opts([Other], Opts)); 1989extract_opts([], Opts) -> 1990 Opts. 1991 1992read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1993 read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); 1994read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> 1995 read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); 1996read_opts([verbose|Rest], Opts) -> 1997 read_opts(Rest, Opts#read_opts{verbose=true}); 1998read_opts([_|Rest], Opts) -> 1999 read_opts(Rest, Opts); 2000read_opts([], Opts) -> 2001 Opts. 2002