1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2000-2017. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20-module(file_io_server). 21 22%% A simple file server for io to one file instance per server instance. 23 24-export([format_error/1]). 25-export([start/3, start_link/3]). 26 27-export([count_and_find/3]). 28 29-record(state, {handle,owner,mref,buf,read_mode,unic}). 30 31-include("file_int.hrl"). 32 33-define(READ_SIZE_LIST, 128). 34-define(READ_SIZE_BINARY, (8*1024)). 35 36-define(eat_message(M, T), receive M -> M after T -> timeout end). 37 38%%%----------------------------------------------------------------- 39%%% Exported functions 40 41format_error({_Line, ?MODULE, Reason}) -> 42 io_lib:format("~w", [Reason]); 43format_error({_Line, Mod, Reason}) -> 44 Mod:format_error(Reason); 45format_error(invalid_unicode) -> 46 io_lib:format("cannot translate from UTF-8", []); 47format_error(ErrorId) -> 48 erl_posix_msg:message(ErrorId). 49 50start(Owner, FileName, ModeList) 51 when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> 52 do_start(spawn, Owner, FileName, ModeList). 53 54start_link(Owner, FileName, ModeList) 55 when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> 56 do_start(spawn_link, Owner, FileName, ModeList). 57 58%%%----------------------------------------------------------------- 59%%% Server starter, dispatcher and helpers 60 61do_start(Spawn, Owner, FileName, ModeList) -> 62 Self = self(), 63 Ref = make_ref(), 64 Utag = erlang:dt_spread_tag(true), 65 Pid = 66 erlang:Spawn( 67 fun() -> 68 erlang:dt_restore_tag(Utag), 69 %% process_flag(trap_exit, true), 70 case parse_options(ModeList) of 71 {ReadMode, UnicodeMode, Opts0} -> 72 Opts = maybe_add_read_ahead(ReadMode, Opts0), 73 case raw_file_io:open(FileName, [raw | Opts]) of 74 {error, Reason} = Error -> 75 Self ! {Ref, Error}, 76 exit(Reason); 77 {ok, Handle} -> 78 %% XXX must I handle R6 nodes here? 79 M = erlang:monitor(process, Owner), 80 Self ! {Ref, ok}, 81 server_loop( 82 #state{handle = Handle, 83 owner = Owner, 84 mref = M, 85 buf = <<>>, 86 read_mode = ReadMode, 87 unic = UnicodeMode}) 88 end; 89 {error,Reason1} = Error1 -> 90 Self ! {Ref, Error1}, 91 exit(Reason1) 92 end 93 end), 94 erlang:dt_restore_tag(Utag), 95 Mref = erlang:monitor(process, Pid), 96 receive 97 {Ref, {error, _Reason} = Error} -> 98 erlang:demonitor(Mref, [flush]), 99 Error; 100 {Ref, ok} -> 101 erlang:demonitor(Mref), 102 receive 103 {'DOWN', Mref, _, _, Reason} -> 104 {error, Reason} 105 after 0 -> 106 {ok, Pid} 107 end; 108 {'DOWN', Mref, _, _, Reason} -> 109 {error, Reason} 110 end. 111 112%%% Returns {ReadMode, UnicodeMode, RealOpts} 113parse_options(List) -> 114 parse_options(expand_encoding(List), list, latin1, []). 115 116parse_options([], list, Uni, Acc) -> 117 {list,Uni,[binary|lists:reverse(Acc)]}; 118parse_options([], binary, Uni, Acc) -> 119 {binary,Uni,lists:reverse(Acc)}; 120parse_options([{encoding, Encoding}|T], RMode, _, Acc) -> 121 case valid_enc(Encoding) of 122 {ok, ExpandedEnc} -> 123 parse_options(T, RMode, ExpandedEnc, Acc); 124 {error,_Reason} = Error -> 125 Error 126 end; 127parse_options([binary|T], _, Uni, Acc) -> 128 parse_options(T, binary, Uni, [binary|Acc]); 129parse_options([H|T], R, U, Acc) -> 130 parse_options(T, R, U, [H|Acc]). 131 132expand_encoding([]) -> 133 []; 134expand_encoding([latin1 | T]) -> 135 [{encoding,latin1} | expand_encoding(T)]; 136expand_encoding([unicode | T]) -> 137 [{encoding,unicode} | expand_encoding(T)]; 138expand_encoding([H|T]) -> 139 [H|expand_encoding(T)]. 140 141valid_enc(latin1) -> 142 {ok,latin1}; 143valid_enc(utf8) -> 144 {ok,unicode}; 145valid_enc(unicode) -> 146 {ok,unicode}; 147valid_enc(utf16) -> 148 {ok,{utf16,big}}; 149valid_enc({utf16,big}) -> 150 {ok,{utf16,big}}; 151valid_enc({utf16,little}) -> 152 {ok,{utf16,little}}; 153valid_enc(utf32) -> 154 {ok,{utf32,big}}; 155valid_enc({utf32,big}) -> 156 {ok,{utf32,big}}; 157valid_enc({utf32,little}) -> 158 {ok,{utf32,little}}; 159valid_enc(_Other) -> 160 {error,badarg}. 161 162%% Add a small read_ahead buffer if the file is opened for reading 163%% only in list mode and no read_ahead is already given. 164maybe_add_read_ahead(binary, Opts) -> 165 Opts; 166maybe_add_read_ahead(list, Opts) -> 167 P = fun(read_ahead) -> true; 168 ({read_ahead,_}) -> true; 169 (append) -> true; 170 (exclusive) -> true; 171 (write) -> true; 172 (_) -> false 173 end, 174 case lists:any(P, Opts) of 175 false -> 176 [{read_ahead, 4096}|Opts]; 177 true -> 178 Opts 179 end. 180 181server_loop(#state{mref = Mref} = State) -> 182 receive 183 {file_request, From, ReplyAs, Request} when is_pid(From) -> 184 case file_request(Request, State) of 185 {reply, Reply, NewState} -> 186 _ = file_reply(From, ReplyAs, Reply), 187 server_loop(NewState); 188 {error, Reply, NewState} -> 189 %% error is the same as reply, except that 190 %% it breaks the io_request_loop further down 191 _ = file_reply(From, ReplyAs, Reply), 192 server_loop(NewState); 193 {stop, Reason, Reply, _NewState} -> 194 _ = file_reply(From, ReplyAs, Reply), 195 exit(Reason) 196 end; 197 {io_request, From, ReplyAs, Request} when is_pid(From) -> 198 case io_request(Request, State) of 199 {reply, Reply, NewState} -> 200 _ = io_reply(From, ReplyAs, Reply), 201 server_loop(NewState); 202 {error, Reply, NewState} -> 203 %% error is the same as reply, except that 204 %% it breaks the io_request_loop further down 205 _ = io_reply(From, ReplyAs, Reply), 206 server_loop(NewState); 207 {stop, Reason, Reply, _NewState} -> 208 _ = io_reply(From, ReplyAs, Reply), 209 exit(Reason) 210 end; 211 {'DOWN', Mref, _, _, Reason} -> 212 exit(Reason); 213 _ -> 214 server_loop(State) 215 end. 216 217file_reply(From, ReplyAs, Reply) -> 218 From ! {file_reply, ReplyAs, Reply}. 219 220io_reply(From, ReplyAs, Reply) -> 221 From ! {io_reply, ReplyAs, Reply}. 222 223%%%----------------------------------------------------------------- 224%%% file requests 225 226file_request({advise,Offset,Length,Advise}, 227 #state{handle=Handle}=State) -> 228 case ?CALL_FD(Handle, advise, [Offset, Length, Advise]) of 229 {error,Reason}=Reply -> 230 {stop,Reason,Reply,State}; 231 Reply -> 232 {reply,Reply,State} 233 end; 234file_request({allocate, Offset, Length}, 235 #state{handle = Handle} = State) -> 236 Reply = ?CALL_FD(Handle, allocate, [Offset, Length]), 237 {reply, Reply, State}; 238file_request({pread,At,Sz}, State) 239 when At =:= cur; 240 At =:= {cur,0} -> 241 case get_chars(Sz, latin1, State) of 242 {reply,Reply,NewState} 243 when is_list(Reply); 244 is_binary(Reply) -> 245 {reply,{ok,Reply},NewState}; 246 Other -> 247 Other 248 end; 249file_request({pread,At,Sz}, 250 #state{handle=Handle,buf=Buf}=State) -> 251 case position(Handle, At, Buf) of 252 {error,_} = Reply -> 253 {error,Reply,State}; 254 _ -> 255 case get_chars(Sz, latin1, State#state{buf= <<>>}) of 256 {reply,Reply,NewState} 257 when is_list(Reply); 258 is_binary(Reply) -> 259 {reply,{ok,Reply},NewState}; 260 Other -> 261 Other 262 end 263 end; 264file_request({pwrite,At,Data}, 265 #state{buf= <<>>}=State) 266 when At =:= cur; 267 At =:= {cur,0} -> 268 put_chars(Data, latin1, State); 269file_request({pwrite,At,Data}, 270 #state{handle=Handle,buf=Buf}=State) -> 271 case position(Handle, At, Buf) of 272 {error,_} = Reply -> 273 {error,Reply,State}; 274 _ -> 275 put_chars(Data, latin1, State) 276 end; 277file_request(datasync, 278 #state{handle=Handle}=State) -> 279 case ?CALL_FD(Handle, datasync, []) of 280 {error,Reason}=Reply -> 281 {stop,Reason,Reply,State}; 282 Reply -> 283 {reply,Reply,State} 284 end; 285file_request(sync, 286 #state{handle=Handle}=State) -> 287 case ?CALL_FD(Handle, sync, []) of 288 {error,Reason}=Reply -> 289 {stop,Reason,Reply,State}; 290 Reply -> 291 {reply,Reply,State} 292 end; 293file_request(close, 294 #state{handle=Handle}=State) -> 295 case ?CALL_FD(Handle, close, []) of 296 {error,Reason}=Reply -> 297 {stop,Reason,Reply,State#state{buf= <<>>}}; 298 Reply -> 299 {stop,normal,Reply,State#state{buf= <<>>}} 300 end; 301file_request({position,At}, 302 #state{handle=Handle,buf=Buf}=State) -> 303 case position(Handle, At, Buf) of 304 {error,_} = Reply -> 305 {error,Reply,State}; 306 Reply -> 307 std_reply(Reply, State) 308 end; 309file_request(truncate, 310 #state{handle=Handle}=State) -> 311 case ?CALL_FD(Handle, truncate, []) of 312 {error,Reason}=Reply -> 313 {stop,Reason,Reply,State#state{buf= <<>>}}; 314 Reply -> 315 std_reply(Reply, State) 316 end; 317file_request({read_handle_info, Opts}, 318 #state{handle=Handle}=State) -> 319 case ?CALL_FD(Handle, read_handle_info, [Opts]) of 320 {error,Reason}=Reply -> 321 {stop,Reason,Reply,State}; 322 Reply -> 323 {reply,Reply,State} 324 end; 325file_request(Unknown, 326 #state{}=State) -> 327 Reason = {request, Unknown}, 328 {error,{error,Reason},State}. 329 330%% Standard reply and clear buffer 331std_reply({error,_}=Reply, State) -> 332 {error,Reply,State#state{buf= <<>>}}; 333std_reply(Reply, State) -> 334 {reply,Reply,State#state{buf= <<>>}}. 335 336%%%----------------------------------------------------------------- 337%%% I/O request 338 339%% New protocol with encoding tags (R13) 340io_request({put_chars, Enc, Chars}, 341 #state{buf= <<>>}=State) -> 342 put_chars(Chars, Enc, State); 343io_request({put_chars, Enc, Chars}, 344 #state{handle=Handle,buf=Buf}=State) -> 345 case position(Handle, cur, Buf) of 346 {error,Reason}=Reply -> 347 {stop,Reason,Reply,State}; 348 _ -> 349 put_chars(Chars, Enc, State#state{buf= <<>>}) 350 end; 351io_request({put_chars,Enc,Mod,Func,Args}, 352 #state{}=State) -> 353 case catch apply(Mod, Func, Args) of 354 Chars when is_list(Chars); is_binary(Chars) -> 355 io_request({put_chars,Enc,Chars}, State); 356 _ -> 357 {error,{error,Func},State} 358 end; 359 360 361io_request({get_until,Enc,_Prompt,Mod,Func,XtraArgs}, 362 #state{}=State) -> 363 get_chars(io_lib, get_until, {Mod, Func, XtraArgs}, Enc, State); 364io_request({get_chars,Enc,_Prompt,N}, 365 #state{}=State) -> 366 get_chars(N, Enc, State); 367 368io_request({get_line,OutEnc,_Prompt}, #state{buf=Buf, read_mode=Mode, unic=InEnc} = State0) -> 369 try 370 %% Minimize the encoding conversions 371 WorkEnc = case InEnc of 372 {_,_} -> OutEnc; %% utf16 or utf32 373 _ -> InEnc %% Byte oriented utf8 or latin1 374 end, 375 {Res, State} = get_line(start, convert_enc(Buf, InEnc, WorkEnc), WorkEnc, State0), 376 {reply, cast(Res, Mode, WorkEnc, OutEnc), State} 377 catch exit:ExError -> 378 {stop,ExError,{error,ExError},State0#state{buf= <<>>}} 379 end; 380 381io_request({setopts, Opts}, 382 #state{}=State) when is_list(Opts) -> 383 setopts(Opts, State); 384 385io_request(getopts, 386 #state{}=State) -> 387 getopts(State); 388 389%% BC with pre-R13 nodes 390io_request({put_chars, Chars},#state{}=State) -> 391 io_request({put_chars, latin1, Chars},State); 392io_request({put_chars,Mod,Func,Args}, #state{}=State) -> 393 io_request({put_chars,latin1,Mod,Func,Args}, State); 394io_request({get_until,_Prompt,Mod,Func,XtraArgs}, #state{}=State) -> 395 io_request({get_until,latin1,_Prompt,Mod,Func,XtraArgs}, State); 396io_request({get_chars,_Prompt,N}, #state{}=State) -> 397 io_request({get_chars,latin1,_Prompt,N}, State); 398io_request({get_line,_Prompt}, #state{}=State) -> 399 io_request({get_line,latin1,_Prompt}, State); 400 401io_request({requests,Requests}, 402 #state{}=State) when is_list(Requests) -> 403 io_request_loop(Requests, {reply,ok,State}); 404io_request(Unknown, 405 #state{}=State) -> 406 Reason = {request,Unknown}, 407 {error,{error,Reason},State}. 408 409 410%% Process a list of requests as long as the results are ok. 411 412io_request_loop([], Result) -> 413 Result; 414io_request_loop([_Request|_Tail], 415 {stop,_Reason,_Reply,_State}=Result) -> 416 Result; 417io_request_loop([_Request|_Tail], 418 {error,_Reply,_State}=Result) -> 419 Result; 420io_request_loop([Request|Tail], 421 {reply,_Reply,State}) -> 422 io_request_loop(Tail, io_request(Request, State)). 423 424 425%% I/O request put_chars 426%% 427put_chars(Chars, latin1, #state{handle=Handle, unic=latin1}=State) -> 428 NewState = State#state{buf = <<>>}, 429 case ?CALL_FD(Handle, write, [Chars]) of 430 {error,Reason}=Reply -> 431 {stop,Reason,Reply,NewState}; 432 Reply -> 433 {reply,Reply,NewState} 434 end; 435put_chars(Chars, InEncoding, #state{handle=Handle, unic=OutEncoding}=State) -> 436 NewState = State#state{buf = <<>>}, 437 case unicode:characters_to_binary(Chars,InEncoding,OutEncoding) of 438 Bin when is_binary(Bin) -> 439 case ?CALL_FD(Handle, write, [Bin]) of 440 {error,Reason}=Reply -> 441 {stop,Reason,Reply,NewState}; 442 Reply -> 443 {reply,Reply,NewState} 444 end; 445 {error,_,_} -> 446 {stop,no_translation, 447 {error,{no_translation, InEncoding, OutEncoding}}, 448 NewState} 449 end. 450 451get_line(S, {<<>>, Cont}, OutEnc, 452 #state{handle=Handle, read_mode=Mode, unic=InEnc}=State) -> 453 case ?CALL_FD(Handle, read, [read_size(Mode)]) of 454 {ok,Bin} -> 455 get_line(S, convert_enc([Cont, Bin], InEnc, OutEnc), OutEnc, State); 456 eof -> 457 get_line(S, {eof, Cont}, OutEnc, State); 458 {error,Reason}=Error -> 459 {stop,Reason,Error,State} 460 end; 461get_line(S0, {Buf, BCont}, OutEnc, #state{unic=InEnc}=State) -> 462 case io_lib:collect_line(S0, Buf, OutEnc, []) of 463 {stop, Result, Cont0} -> 464 %% Convert both buffers back to file InEnc encoding 465 {Cont, <<>>} = convert_enc(Cont0, OutEnc, InEnc), 466 {Result, State#state{buf=cast_binary([Cont, BCont])}}; 467 S -> 468 get_line(S, {<<>>, BCont}, OutEnc, State) 469 end. 470 471convert_enc(Bins, Enc, Enc) -> 472 {cast_binary(Bins), <<>>}; 473convert_enc(eof, _, _) -> 474 {<<>>, <<>>}; 475convert_enc(Bin, InEnc, OutEnc) -> 476 case unicode:characters_to_binary(Bin, InEnc, OutEnc) of 477 Res when is_binary(Res) -> 478 {Res, <<>>}; 479 {incomplete, Res, Cont} -> 480 {Res, Cont}; 481 {error, _, _} -> 482 exit({no_translation, InEnc, OutEnc}) 483 end. 484 485%% 486%% Process the I/O request get_chars 487%% 488get_chars(0, Enc, #state{read_mode=ReadMode,unic=InEncoding}=State) -> 489 {reply,cast(<<>>, ReadMode,InEncoding, Enc),State}; 490get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State) 491 when is_integer(N), N > 0, N =< byte_size(Buf) -> 492 {B1,B2} = split_binary(Buf, N), 493 {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}}; 494get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State) 495 when is_integer(N), N > 0, N =< byte_size(Buf) -> 496 {B1,B2} = split_binary(Buf, N), 497 {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}}; 498get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=latin1}=State) 499 when is_integer(N), N > 0 -> 500 BufSize = byte_size(Buf), 501 NeedSize = N-BufSize, 502 Size = erlang:max(NeedSize, ?READ_SIZE_BINARY), 503 case ?CALL_FD(Handle, read, [Size]) of 504 {ok, B} -> 505 if BufSize+byte_size(B) < N -> 506 std_reply(cat(Buf, B, ReadMode,latin1,OutEnc), State); 507 true -> 508 {B1,B2} = split_binary(B, NeedSize), 509 {reply,cat(Buf, B1, ReadMode, latin1,OutEnc),State#state{buf=B2}} 510 end; 511 eof when BufSize =:= 0 -> 512 {reply,eof,State}; 513 eof -> 514 std_reply(cast(Buf, ReadMode,latin1,OutEnc), State); 515 {error,Reason}=Error -> 516 {stop,Reason,Error,State#state{buf= <<>>}} 517 end; 518get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=InEncoding}=State) 519 when is_integer(N), N > 0 -> 520 try 521 %% This is rather tricky, we need to count the actual number of characters 522 %% in the buffer first as unicode characters are not constant in length 523 {BufCount, SplitPos} = count_and_find(Buf,N,InEncoding), 524 case BufCount >= N of 525 true -> 526 {B1,B2} = case SplitPos of 527 none -> {Buf,<<>>}; 528 _ ->split_binary(Buf,SplitPos) 529 end, 530 {reply,cast(B1, ReadMode,InEncoding,OutEnc),State#state{buf=B2}}; 531 false -> 532 %% Need more, Try to read 4*needed in bytes... 533 NeedSize = (N - BufCount) * 4, 534 Size = erlang:max(NeedSize, ?READ_SIZE_BINARY), 535 case ?CALL_FD(Handle, read, [Size]) of 536 {ok, B} -> 537 NewBuf = list_to_binary([Buf,B]), 538 {NewCount,NewSplit} = count_and_find(NewBuf,N,InEncoding), 539 case NewCount >= N of 540 true -> 541 {B01,B02} = case NewSplit of 542 none -> {NewBuf,<<>>}; 543 _ ->split_binary(NewBuf, NewSplit) 544 end, 545 {reply,cast(B01, ReadMode,InEncoding,OutEnc), 546 State#state{buf=B02}}; 547 false -> 548 %% Reached end of file 549 std_reply(cast(NewBuf, ReadMode,InEncoding,OutEnc), 550 State#state{buf = <<>>}) 551 end; 552 eof when BufCount =:= 0 -> 553 {reply,eof,State}; 554 eof -> 555 std_reply(cast(Buf, ReadMode,InEncoding,OutEnc), State#state{buf = <<>>}); 556 {error,Reason}=Error -> 557 {stop,Reason,Error,State#state{buf = <<>>}} 558 end 559 end 560 catch 561 exit:ExError -> 562 {stop,ExError,{error,ExError},State#state{buf= <<>>}} 563 end; 564 565get_chars(_N, _, #state{}=State) -> 566 {error,{error,get_chars},State}. 567 568get_chars(Mod, Func, XtraArg, OutEnc, #state{buf= <<>>}=State) -> 569 get_chars_empty(Mod, Func, XtraArg, start, OutEnc, State); 570get_chars(Mod, Func, XtraArg, OutEnc, #state{buf=Buf}=State) -> 571 get_chars_apply(Mod, Func, XtraArg, start, OutEnc, State#state{buf= <<>>}, Buf). 572 573get_chars_empty(Mod, Func, XtraArg, S, latin1, 574 #state{handle=Handle,read_mode=ReadMode, unic=latin1}=State) -> 575 case ?CALL_FD(Handle, read, [read_size(ReadMode)]) of 576 {ok,Bin} -> 577 get_chars_apply(Mod, Func, XtraArg, S, latin1, State, Bin); 578 eof -> 579 get_chars_apply(Mod, Func, XtraArg, S, latin1, State, eof); 580 {error,Reason}=Error -> 581 {stop,Reason,Error,State} 582 end; 583get_chars_empty(Mod, Func, XtraArg, S, OutEnc, 584 #state{handle=Handle,read_mode=ReadMode}=State) -> 585 case ?CALL_FD(Handle, read, [read_size(ReadMode)]) of 586 {ok,Bin} -> 587 get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, Bin); 588 eof -> 589 get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof); 590 {error,Reason}=Error -> 591 {stop,Reason,Error,State} 592 end. 593get_chars_notempty(Mod, Func, XtraArg, S, OutEnc, 594 #state{handle=Handle,read_mode=ReadMode,buf = B}=State) -> 595 case ?CALL_FD(Handle, read, [read_size(ReadMode)]) of 596 {ok,Bin} -> 597 get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, list_to_binary([B,Bin])); 598 eof -> 599 case B of 600 <<>> -> 601 get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof); 602 _ -> 603 {stop,invalid_unicode,invalid_unicode_error(Mod, Func, XtraArg, S),State} 604 end; 605 {error,Reason}=Error -> 606 {stop,Reason,Error,State} 607 end. 608 609 610get_chars_apply(Mod, Func, XtraArg, S0, latin1, 611 #state{read_mode=ReadMode,unic=latin1}=State, Data0) -> 612 Data1 = case ReadMode of 613 list when is_binary(Data0) -> binary_to_list(Data0); 614 _ -> Data0 615 end, 616 case catch Mod:Func(S0, Data1, latin1, XtraArg) of 617 {stop,Result,Buf} -> 618 {reply,Result,State#state{buf=cast_binary(Buf)}}; 619 {'EXIT',Reason} -> 620 {stop,Reason,{error,err_func(Mod, Func, XtraArg)},State}; 621 S1 -> 622 get_chars_empty(Mod, Func, XtraArg, S1, latin1, State) 623 end; 624get_chars_apply(Mod, Func, XtraArg, S0, OutEnc, 625 #state{read_mode=ReadMode,unic=InEnc}=State, Data0) -> 626 try 627 {Data1,NewBuff} = case ReadMode of 628 list when is_binary(Data0) -> 629 case unicode:characters_to_list(Data0,InEnc) of 630 {Tag,Decoded,Rest} when Decoded =/= [], Tag =:= error; Decoded =/= [], Tag =:= incomplete -> 631 {Decoded,erlang:iolist_to_binary(Rest)}; 632 {error, [], _} -> 633 exit(invalid_unicode); 634 {incomplete, [], R} -> 635 {[],R}; 636 List when is_list(List) -> 637 {List,<<>>} 638 end; 639 binary when is_binary(Data0) -> 640 case unicode:characters_to_binary(Data0,InEnc,OutEnc) of 641 {Tag2,Decoded2,Rest2} when Decoded2 =/= <<>>, Tag2 =:= error; Decoded2 =/= <<>>, Tag2 =:= incomplete -> 642 {Decoded2,erlang:iolist_to_binary(Rest2)}; 643 {error, <<>>, _} -> 644 exit(invalid_unicode); 645 {incomplete, <<>>, R} -> 646 {<<>>,R}; 647 Binary when is_binary(Binary) -> 648 {Binary,<<>>} 649 end; 650 _ -> %i.e. eof 651 {Data0,<<>>} 652 end, 653 case catch Mod:Func(S0, Data1, OutEnc, XtraArg) of 654 {stop,Result,Buf} -> 655 {reply,Result,State#state{buf = (if 656 is_binary(Buf) -> 657 list_to_binary([unicode:characters_to_binary(Buf,OutEnc,InEnc),NewBuff]); 658 is_list(Buf) -> 659 list_to_binary([unicode:characters_to_binary(Buf,unicode,InEnc),NewBuff]); 660 true -> 661 NewBuff 662 end)}}; 663 {'EXIT',Reason} -> 664 {stop,Reason,{error,err_func(Mod, Func, XtraArg)},State}; 665 S1 -> 666 get_chars_notempty(Mod, Func, XtraArg, S1, OutEnc, State#state{buf=NewBuff}) 667 end 668 catch 669 exit:ExReason -> 670 {stop,ExReason,invalid_unicode_error(Mod, Func, XtraArg, S0),State}; 671 error:ErrReason -> 672 {stop,ErrReason,{error,err_func(Mod, Func, XtraArg)},State} 673 end. 674 675%% A hack that tries to inform the caller about the position where the 676%% error occured. 677invalid_unicode_error(Mod, Func, XtraArg, S) -> 678 try 679 {erl_scan,tokens,_Args} = XtraArg, 680 Location = erl_scan:continuation_location(S), 681 {error,{Location, ?MODULE, invalid_unicode},Location} 682 catch 683 _:_ -> 684 {error,err_func(Mod, Func, XtraArg)} 685 end. 686 687%% Convert error code to make it look as before 688err_func(io_lib, get_until, {_,F,_}) -> 689 F. 690 691 692 693%% Process the I/O request setopts 694%% 695%% setopts 696setopts(Opts0,State) -> 697 Opts = proplists:unfold( 698 proplists:substitute_negations( 699 [{list,binary}], 700 expand_encoding(Opts0))), 701 case check_valid_opts(Opts) of 702 true -> 703 do_setopts(Opts,State); 704 false -> 705 {error,{error,enotsup},State} 706 end. 707check_valid_opts([]) -> 708 true; 709check_valid_opts([{binary,_}|T]) -> 710 check_valid_opts(T); 711check_valid_opts([{encoding,_Enc}|T]) -> 712 check_valid_opts(T); 713check_valid_opts(_) -> 714 false. 715do_setopts(Opts, State) -> 716 case valid_enc(proplists:get_value(encoding, Opts, State#state.unic)) of 717 {ok,NewUnic} -> 718 case proplists:get_value(binary, Opts) of 719 true -> 720 {reply,ok,State#state{read_mode=binary, unic=NewUnic}}; 721 false -> 722 {reply,ok,State#state{read_mode=list, unic=NewUnic}}; 723 undefined -> 724 {reply,ok,State#state{unic=NewUnic}} 725 end; 726 _ -> 727 {error,{error,badarg},State} 728 end. 729 730getopts(#state{read_mode=RM, unic=Unic} = State) -> 731 Bin = {binary, RM =:= binary}, 732 Uni = {encoding, Unic}, 733 {reply,[Bin,Uni],State}. 734 735%% Concatenate two binaries and convert the result to list or binary 736cat(B1, B2, binary, latin1, latin1) -> 737 list_to_binary([B1,B2]); 738cat(B1, B2, binary, InEncoding, OutEncoding) -> 739 case unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding) of 740 Good when is_binary(Good) -> 741 Good; 742 _ -> 743 exit({no_translation,InEncoding,OutEncoding}) 744 end; 745%% Dialyzer finds this is never used... 746%% cat(B1, B2, list, InEncoding, OutEncoding) when InEncoding =/= latin1 -> 747%% % Catch i.e. unicode -> latin1 errors by using the outencoding although otherwise 748%% % irrelevant for lists... 749%% try 750%% unicode:characters_to_list(unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding), 751%% OutEncoding) 752%% catch 753%% error:_ -> 754%% exit({no_translation,InEncoding,OutEncoding}) 755%% end. 756cat(B1, B2, list, latin1,_) -> 757 binary_to_list(B1)++binary_to_list(B2). 758 759%% Cast binary to list or binary 760cast(eof, _, _, _) -> 761 eof; 762cast(B, binary, latin1, latin1) -> 763 B; 764cast(B, binary, InEncoding, OutEncoding) -> 765 case unicode:characters_to_binary(B,InEncoding,OutEncoding) of 766 Good when is_binary(Good) -> 767 Good; 768 _ -> 769 exit({no_translation,InEncoding,OutEncoding}) 770 end; 771cast(B, list, latin1, _) -> 772 binary_to_list(B); 773cast(B, list, InEncoding, OutEncoding) -> 774 try 775 unicode:characters_to_list(unicode:characters_to_binary(B,InEncoding,OutEncoding), 776 OutEncoding) 777 catch 778 error:_ -> 779 exit({no_translation,InEncoding,OutEncoding}) 780 end. 781 782%% Convert buffer to binary 783cast_binary(Binary) when is_binary(Binary) -> 784 Binary; 785cast_binary([<<>>|List]) -> 786 cast_binary(List); 787cast_binary(List) when is_list(List) -> 788 list_to_binary(List); 789cast_binary(_EOF) -> 790 <<>>. 791 792%% Read size for different read modes 793read_size(binary) -> 794 ?READ_SIZE_BINARY; 795read_size(list) -> 796 ?READ_SIZE_LIST. 797 798%% Utf utility 799count_and_find(Bin,N,Encoding) -> 800 cafu(Bin,N,0,0,none,case Encoding of 801 unicode -> utf8; 802 Oth -> Oth 803 end). 804 805cafu(<<>>,0,Count,ByteCount,_SavePos,_) -> 806 {Count,ByteCount}; 807cafu(<<>>,_N,Count,_ByteCount,SavePos,_) -> 808 {Count,SavePos}; 809cafu(<<_/utf8,Rest/binary>>, 0, Count, ByteCount, _SavePos, utf8) -> 810 cafu(Rest,-1,Count+1,0,ByteCount,utf8); 811cafu(<<_/utf8,Rest/binary>>, N, Count, _ByteCount, SavePos, utf8) when N < 0 -> 812 cafu(Rest,-1,Count+1,0,SavePos,utf8); 813cafu(<<_/utf8,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, utf8) -> 814 Delta = byte_size(Whole) - byte_size(Rest), 815 cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,utf8); 816cafu(<<_/utf16-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,big}) -> 817 cafu(Rest,-1,Count+1,0,ByteCount,{utf16,big}); 818cafu(<<_/utf16-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,big}) when N < 0 -> 819 cafu(Rest,-1,Count+1,0,SavePos,{utf16,big}); 820cafu(<<_/utf16-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,big}) -> 821 Delta = byte_size(Whole) - byte_size(Rest), 822 cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,big}); 823cafu(<<_/utf16-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,little}) -> 824 cafu(Rest,-1,Count+1,0,ByteCount,{utf16,little}); 825cafu(<<_/utf16-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,little}) when N < 0 -> 826 cafu(Rest,-1,Count+1,0,SavePos,{utf16,little}); 827cafu(<<_/utf16-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,little}) -> 828 Delta = byte_size(Whole) - byte_size(Rest), 829 cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,little}); 830cafu(<<_/utf32-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,big}) -> 831 cafu(Rest,-1,Count+1,0,ByteCount,{utf32,big}); 832cafu(<<_/utf32-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,big}) when N < 0 -> 833 cafu(Rest,-1,Count+1,0,SavePos,{utf32,big}); 834cafu(<<_/utf32-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,big}) -> 835 Delta = byte_size(Whole) - byte_size(Rest), 836 cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,big}); 837cafu(<<_/utf32-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,little}) -> 838 cafu(Rest,-1,Count+1,0,ByteCount,{utf32,little}); 839cafu(<<_/utf32-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,little}) when N < 0 -> 840 cafu(Rest,-1,Count+1,0,SavePos,{utf32,little}); 841cafu(<<_/utf32-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,little}) -> 842 Delta = byte_size(Whole) - byte_size(Rest), 843 cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,little}); 844cafu(_Other,0,Count,ByteCount,_,_) -> % Non Unicode character, 845 % but found our point, OK this time 846 {Count,ByteCount}; 847cafu(Other,_N,Count,0,SavePos,Enc) -> % Not enough, but valid chomped unicode 848 % at end. 849 case cbv(Enc,Other) of 850 false -> 851 exit(invalid_unicode); 852 _ -> 853 {Count,SavePos} 854 end; 855cafu(Other,_N,Count,ByteCount,none,Enc) -> % Return what we'we got this far 856 % although not complete, 857 % it's not (yet) in error 858 case cbv(Enc,Other) of 859 false -> 860 exit(invalid_unicode); 861 _ -> 862 {Count,ByteCount} 863 end; 864cafu(Other,_N,Count,_ByteCount,SavePos,Enc) -> % As above but we have 865 % found a position 866 case cbv(Enc,Other) of 867 false -> 868 exit(invalid_unicode); 869 _ -> 870 {Count,SavePos} 871 end. 872 873%% 874%% Bluntly stolen from stdlib/unicode.erl (cbv means can be valid?) 875%% 876cbv(utf8,<<1:1,1:1,0:1,_:5>>) -> 877 1; 878cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) -> 879 case R of 880 <<>> -> 881 2; 882 <<1:1,0:1,_:6>> -> 883 1; 884 _ -> 885 false 886 end; 887cbv(utf8,<<1:1,1:1,1:1,1:1,0:1,_:3,R/binary>>) -> 888 case R of 889 <<>> -> 890 3; 891 <<1:1,0:1,_:6>> -> 892 2; 893 <<1:1,0:1,_:6,1:1,0:1,_:6>> -> 894 1; 895 _ -> 896 false 897 end; 898cbv(utf8,_) -> 899 false; 900 901cbv({utf16,big},<<A:8>>) when A =< 215; A >= 224 -> 902 1; 903cbv({utf16,big},<<54:6,_:2>>) -> 904 3; 905cbv({utf16,big},<<54:6,_:10>>) -> 906 2; 907cbv({utf16,big},<<54:6,_:10,55:6,_:2>>) -> 908 1; 909cbv({utf16,big},_) -> 910 false; 911cbv({utf16,little},<<_:8>>) -> 912 1; % or 3, we'll see 913cbv({utf16,little},<<_:8,54:6,_:2>>) -> 914 2; 915cbv({utf16,little},<<_:8,54:6,_:2,_:8>>) -> 916 1; 917cbv({utf16,little},_) -> 918 false; 919 920 921cbv({utf32,big}, <<0:8>>) -> 922 3; 923cbv({utf32,big}, <<0:8,X:8>>) when X =< 16 -> 924 2; 925cbv({utf32,big}, <<0:8,X:8,Y:8>>) 926 when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> 927 1; 928cbv({utf32,big},_) -> 929 false; 930cbv({utf32,little},<<_:8>>) -> 931 3; 932cbv({utf32,little},<<_:8,_:8>>) -> 933 2; 934cbv({utf32,little},<<X:8,255:8,0:8>>) when X =:= 254; X =:= 255 -> 935 false; 936cbv({utf32,little},<<_:8,Y:8,X:8>>) 937 when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> 938 1; 939cbv({utf32,little},_) -> 940 false. 941 942 943%%%----------------------------------------------------------------- 944%%% ?PRIM_FILE helpers 945 946%% Compensates ?PRIM_FILE:position/2 for the number of bytes 947%% we have buffered 948position(Handle, At, Buf) -> 949 SeekTo = 950 case At of 951 {cur, Offs} -> {cur, Offs-byte_size(Buf)}; 952 cur -> {cur, -byte_size(Buf)}; 953 _ -> At 954 end, 955 ?CALL_FD(Handle, position, [SeekTo]). 956