1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2000-2017. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20-module(file_io_server).
21
22%% A simple file server for io to one file instance per server instance.
23
24-export([format_error/1]).
25-export([start/3, start_link/3]).
26
27-export([count_and_find/3]).
28
29-record(state, {handle,owner,mref,buf,read_mode,unic}).
30
31-include("file_int.hrl").
32
33-define(READ_SIZE_LIST, 128).
34-define(READ_SIZE_BINARY, (8*1024)).
35
36-define(eat_message(M, T), receive M -> M after T -> timeout end).
37
38%%%-----------------------------------------------------------------
39%%% Exported functions
40
41format_error({_Line, ?MODULE, Reason}) ->
42    io_lib:format("~w", [Reason]);
43format_error({_Line, Mod, Reason}) ->
44    Mod:format_error(Reason);
45format_error(invalid_unicode) ->
46    io_lib:format("cannot translate from UTF-8", []);
47format_error(ErrorId) ->
48    erl_posix_msg:message(ErrorId).
49
50start(Owner, FileName, ModeList)
51  when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) ->
52    do_start(spawn, Owner, FileName, ModeList).
53
54start_link(Owner, FileName, ModeList)
55  when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) ->
56    do_start(spawn_link, Owner, FileName, ModeList).
57
58%%%-----------------------------------------------------------------
59%%% Server starter, dispatcher and helpers
60
61do_start(Spawn, Owner, FileName, ModeList) ->
62    Self = self(),
63    Ref = make_ref(),
64    Utag = erlang:dt_spread_tag(true),
65    Pid =
66	erlang:Spawn(
67	  fun() ->
68		  erlang:dt_restore_tag(Utag),
69		  %% process_flag(trap_exit, true),
70		  case parse_options(ModeList) of
71                      {ReadMode, UnicodeMode, Opts0} ->
72                          Opts = maybe_add_read_ahead(ReadMode, Opts0),
73			  case raw_file_io:open(FileName, [raw | Opts]) of
74			      {error, Reason} = Error ->
75				  Self ! {Ref, Error},
76				  exit(Reason);
77			      {ok, Handle} ->
78				  %% XXX must I handle R6 nodes here?
79				  M = erlang:monitor(process, Owner),
80				  Self ! {Ref, ok},
81				  server_loop(
82				    #state{handle    = Handle,
83					   owner     = Owner,
84					   mref      = M,
85					   buf       = <<>>,
86					   read_mode = ReadMode,
87					   unic = UnicodeMode})
88			  end;
89		      {error,Reason1} = Error1 ->
90			  Self ! {Ref, Error1},
91			  exit(Reason1)
92		  end
93	  end),
94    erlang:dt_restore_tag(Utag),
95    Mref = erlang:monitor(process, Pid),
96    receive
97	{Ref, {error, _Reason} = Error} ->
98	    erlang:demonitor(Mref, [flush]),
99	    Error;
100	{Ref, ok} ->
101	    erlang:demonitor(Mref),
102	    receive
103		{'DOWN', Mref, _, _, Reason} ->
104		    {error, Reason}
105	    after 0 ->
106		    {ok, Pid}
107	    end;
108	{'DOWN', Mref, _, _, Reason} ->
109	    {error, Reason}
110    end.
111
112%%% Returns {ReadMode, UnicodeMode, RealOpts}
113parse_options(List) ->
114    parse_options(expand_encoding(List), list, latin1, []).
115
116parse_options([], list, Uni, Acc) ->
117    {list,Uni,[binary|lists:reverse(Acc)]};
118parse_options([], binary, Uni, Acc) ->
119    {binary,Uni,lists:reverse(Acc)};
120parse_options([{encoding, Encoding}|T], RMode, _, Acc) ->
121    case valid_enc(Encoding) of
122	{ok, ExpandedEnc} ->
123	    parse_options(T, RMode, ExpandedEnc, Acc);
124	{error,_Reason} = Error ->
125	    Error
126    end;
127parse_options([binary|T], _, Uni, Acc) ->
128    parse_options(T, binary, Uni, [binary|Acc]);
129parse_options([H|T], R, U, Acc) ->
130    parse_options(T, R, U, [H|Acc]).
131
132expand_encoding([]) ->
133    [];
134expand_encoding([latin1 | T]) ->
135    [{encoding,latin1} | expand_encoding(T)];
136expand_encoding([unicode | T]) ->
137    [{encoding,unicode} | expand_encoding(T)];
138expand_encoding([H|T]) ->
139    [H|expand_encoding(T)].
140
141valid_enc(latin1) ->
142    {ok,latin1};
143valid_enc(utf8) ->
144    {ok,unicode};
145valid_enc(unicode) ->
146    {ok,unicode};
147valid_enc(utf16) ->
148    {ok,{utf16,big}};
149valid_enc({utf16,big}) ->
150    {ok,{utf16,big}};
151valid_enc({utf16,little}) ->
152    {ok,{utf16,little}};
153valid_enc(utf32) ->
154    {ok,{utf32,big}};
155valid_enc({utf32,big}) ->
156    {ok,{utf32,big}};
157valid_enc({utf32,little}) ->
158    {ok,{utf32,little}};
159valid_enc(_Other) ->
160    {error,badarg}.
161
162%% Add a small read_ahead buffer if the file is opened for reading
163%% only in list mode and no read_ahead is already given.
164maybe_add_read_ahead(binary, Opts) ->
165    Opts;
166maybe_add_read_ahead(list, Opts) ->
167    P = fun(read_ahead) -> true;
168           ({read_ahead,_}) -> true;
169           (append) -> true;
170           (exclusive) -> true;
171           (write) -> true;
172           (_) -> false
173        end,
174    case lists:any(P, Opts) of
175        false ->
176            [{read_ahead, 4096}|Opts];
177        true ->
178            Opts
179    end.
180
181server_loop(#state{mref = Mref} = State) ->
182    receive
183	{file_request, From, ReplyAs, Request} when is_pid(From) ->
184	    case file_request(Request, State) of
185		{reply, Reply, NewState} ->
186		    _ = file_reply(From, ReplyAs, Reply),
187		    server_loop(NewState);
188		{error, Reply, NewState} ->
189		    %% error is the same as reply, except that
190		    %% it breaks the io_request_loop further down
191		    _ = file_reply(From, ReplyAs, Reply),
192		    server_loop(NewState);
193		{stop, Reason, Reply, _NewState} ->
194		    _ = file_reply(From, ReplyAs, Reply),
195		    exit(Reason)
196	    end;
197	{io_request, From, ReplyAs, Request} when is_pid(From) ->
198	    case io_request(Request, State) of
199		{reply, Reply, NewState} ->
200		    _ = io_reply(From, ReplyAs, Reply),
201		    server_loop(NewState);
202		{error, Reply, NewState} ->
203		    %% error is the same as reply, except that
204		    %% it breaks the io_request_loop further down
205		    _ = io_reply(From, ReplyAs, Reply),
206		    server_loop(NewState);
207		{stop, Reason, Reply, _NewState} ->
208		    _ = io_reply(From, ReplyAs, Reply),
209		    exit(Reason)
210	    end;
211	{'DOWN', Mref, _, _, Reason} ->
212	    exit(Reason);
213	_ ->
214	    server_loop(State)
215    end.
216
217file_reply(From, ReplyAs, Reply) ->
218    From ! {file_reply, ReplyAs, Reply}.
219
220io_reply(From, ReplyAs, Reply) ->
221    From ! {io_reply, ReplyAs, Reply}.
222
223%%%-----------------------------------------------------------------
224%%% file requests
225
226file_request({advise,Offset,Length,Advise},
227         #state{handle=Handle}=State) ->
228    case ?CALL_FD(Handle, advise, [Offset, Length, Advise]) of
229    {error,Reason}=Reply ->
230        {stop,Reason,Reply,State};
231    Reply ->
232        {reply,Reply,State}
233    end;
234file_request({allocate, Offset, Length},
235         #state{handle = Handle} = State) ->
236    Reply = ?CALL_FD(Handle, allocate, [Offset, Length]),
237    {reply, Reply, State};
238file_request({pread,At,Sz}, State)
239  when At =:= cur;
240       At =:= {cur,0} ->
241    case get_chars(Sz, latin1, State) of
242	{reply,Reply,NewState}
243	  when is_list(Reply);
244	       is_binary(Reply) ->
245	    {reply,{ok,Reply},NewState};
246	Other ->
247	    Other
248    end;
249file_request({pread,At,Sz},
250	     #state{handle=Handle,buf=Buf}=State) ->
251    case position(Handle, At, Buf) of
252	{error,_} = Reply ->
253	    {error,Reply,State};
254	_ ->
255	    case get_chars(Sz, latin1, State#state{buf= <<>>}) of
256		{reply,Reply,NewState}
257		  when is_list(Reply);
258		       is_binary(Reply) ->
259		    {reply,{ok,Reply},NewState};
260		Other ->
261		    Other
262	    end
263    end;
264file_request({pwrite,At,Data},
265	     #state{buf= <<>>}=State)
266  when At =:= cur;
267       At =:= {cur,0} ->
268    put_chars(Data, latin1, State);
269file_request({pwrite,At,Data},
270	     #state{handle=Handle,buf=Buf}=State) ->
271    case position(Handle, At, Buf) of
272	{error,_} = Reply ->
273	    {error,Reply,State};
274	_ ->
275	    put_chars(Data, latin1, State)
276    end;
277file_request(datasync,
278	     #state{handle=Handle}=State) ->
279    case ?CALL_FD(Handle, datasync, []) of
280	{error,Reason}=Reply ->
281	    {stop,Reason,Reply,State};
282	Reply ->
283	    {reply,Reply,State}
284    end;
285file_request(sync,
286	     #state{handle=Handle}=State) ->
287    case ?CALL_FD(Handle, sync, []) of
288	{error,Reason}=Reply ->
289	    {stop,Reason,Reply,State};
290	Reply ->
291	    {reply,Reply,State}
292    end;
293file_request(close,
294	     #state{handle=Handle}=State) ->
295    case ?CALL_FD(Handle, close, []) of
296	{error,Reason}=Reply ->
297	    {stop,Reason,Reply,State#state{buf= <<>>}};
298	Reply ->
299	    {stop,normal,Reply,State#state{buf= <<>>}}
300    end;
301file_request({position,At},
302	     #state{handle=Handle,buf=Buf}=State) ->
303    case position(Handle, At, Buf) of
304	{error,_} = Reply ->
305	    {error,Reply,State};
306	Reply ->
307	    std_reply(Reply, State)
308    end;
309file_request(truncate,
310	     #state{handle=Handle}=State) ->
311    case ?CALL_FD(Handle, truncate, []) of
312	{error,Reason}=Reply ->
313	    {stop,Reason,Reply,State#state{buf= <<>>}};
314	Reply ->
315	    std_reply(Reply, State)
316    end;
317file_request({read_handle_info, Opts},
318             #state{handle=Handle}=State) ->
319    case ?CALL_FD(Handle, read_handle_info, [Opts]) of
320        {error,Reason}=Reply ->
321            {stop,Reason,Reply,State};
322        Reply ->
323            {reply,Reply,State}
324    end;
325file_request(Unknown,
326	     #state{}=State) ->
327    Reason = {request, Unknown},
328    {error,{error,Reason},State}.
329
330%% Standard reply and clear buffer
331std_reply({error,_}=Reply, State) ->
332    {error,Reply,State#state{buf= <<>>}};
333std_reply(Reply, State) ->
334    {reply,Reply,State#state{buf= <<>>}}.
335
336%%%-----------------------------------------------------------------
337%%% I/O request
338
339%% New protocol with encoding tags (R13)
340io_request({put_chars, Enc, Chars},
341	   #state{buf= <<>>}=State) ->
342    put_chars(Chars, Enc, State);
343io_request({put_chars, Enc, Chars},
344	   #state{handle=Handle,buf=Buf}=State) ->
345    case position(Handle, cur, Buf) of
346	{error,Reason}=Reply ->
347	    {stop,Reason,Reply,State};
348	_ ->
349	    put_chars(Chars, Enc, State#state{buf= <<>>})
350    end;
351io_request({put_chars,Enc,Mod,Func,Args},
352	   #state{}=State) ->
353    case catch apply(Mod, Func, Args) of
354	Chars when is_list(Chars); is_binary(Chars) ->
355	    io_request({put_chars,Enc,Chars}, State);
356	_ ->
357	    {error,{error,Func},State}
358    end;
359
360
361io_request({get_until,Enc,_Prompt,Mod,Func,XtraArgs},
362	   #state{}=State) ->
363    get_chars(io_lib, get_until, {Mod, Func, XtraArgs}, Enc, State);
364io_request({get_chars,Enc,_Prompt,N},
365	   #state{}=State) ->
366    get_chars(N, Enc, State);
367
368io_request({get_line,OutEnc,_Prompt}, #state{buf=Buf, read_mode=Mode, unic=InEnc} = State0) ->
369    try
370	%% Minimize the encoding conversions
371	WorkEnc = case InEnc of
372		      {_,_} -> OutEnc; %% utf16 or utf32
373		      _ -> InEnc %% Byte oriented utf8 or latin1
374		  end,
375	{Res, State} = get_line(start, convert_enc(Buf, InEnc, WorkEnc), WorkEnc, State0),
376	{reply, cast(Res, Mode, WorkEnc, OutEnc), State}
377    catch exit:ExError ->
378	    {stop,ExError,{error,ExError},State0#state{buf= <<>>}}
379    end;
380
381io_request({setopts, Opts},
382	   #state{}=State) when is_list(Opts) ->
383    setopts(Opts, State);
384
385io_request(getopts,
386	   #state{}=State) ->
387    getopts(State);
388
389%% BC with pre-R13 nodes
390io_request({put_chars, Chars},#state{}=State) ->
391    io_request({put_chars, latin1, Chars},State);
392io_request({put_chars,Mod,Func,Args}, #state{}=State) ->
393    io_request({put_chars,latin1,Mod,Func,Args}, State);
394io_request({get_until,_Prompt,Mod,Func,XtraArgs}, #state{}=State) ->
395    io_request({get_until,latin1,_Prompt,Mod,Func,XtraArgs}, State);
396io_request({get_chars,_Prompt,N}, #state{}=State) ->
397    io_request({get_chars,latin1,_Prompt,N}, State);
398io_request({get_line,_Prompt}, #state{}=State) ->
399    io_request({get_line,latin1,_Prompt}, State);
400
401io_request({requests,Requests},
402	   #state{}=State) when is_list(Requests) ->
403    io_request_loop(Requests, {reply,ok,State});
404io_request(Unknown,
405	   #state{}=State) ->
406    Reason = {request,Unknown},
407    {error,{error,Reason},State}.
408
409
410%% Process a list of requests as long as the results are ok.
411
412io_request_loop([], Result) ->
413    Result;
414io_request_loop([_Request|_Tail],
415		{stop,_Reason,_Reply,_State}=Result) ->
416    Result;
417io_request_loop([_Request|_Tail],
418		{error,_Reply,_State}=Result) ->
419    Result;
420io_request_loop([Request|Tail],
421		{reply,_Reply,State}) ->
422    io_request_loop(Tail, io_request(Request, State)).
423
424
425%% I/O request put_chars
426%%
427put_chars(Chars, latin1, #state{handle=Handle, unic=latin1}=State) ->
428    NewState = State#state{buf = <<>>},
429    case ?CALL_FD(Handle, write, [Chars]) of
430	{error,Reason}=Reply ->
431	    {stop,Reason,Reply,NewState};
432	Reply ->
433	    {reply,Reply,NewState}
434    end;
435put_chars(Chars, InEncoding, #state{handle=Handle, unic=OutEncoding}=State) ->
436    NewState = State#state{buf = <<>>},
437    case unicode:characters_to_binary(Chars,InEncoding,OutEncoding) of
438	Bin when is_binary(Bin) ->
439	    case ?CALL_FD(Handle, write, [Bin]) of
440		{error,Reason}=Reply ->
441		    {stop,Reason,Reply,NewState};
442		Reply ->
443		    {reply,Reply,NewState}
444	    end;
445	{error,_,_} ->
446	    {stop,no_translation,
447	     {error,{no_translation, InEncoding, OutEncoding}},
448	     NewState}
449    end.
450
451get_line(S, {<<>>, Cont}, OutEnc,
452	 #state{handle=Handle, read_mode=Mode, unic=InEnc}=State) ->
453    case ?CALL_FD(Handle, read, [read_size(Mode)]) of
454	{ok,Bin} ->
455	    get_line(S, convert_enc([Cont, Bin], InEnc, OutEnc), OutEnc, State);
456	eof ->
457	    get_line(S, {eof, Cont}, OutEnc, State);
458	{error,Reason}=Error ->
459	    {stop,Reason,Error,State}
460    end;
461get_line(S0, {Buf, BCont}, OutEnc, #state{unic=InEnc}=State) ->
462    case io_lib:collect_line(S0, Buf, OutEnc, []) of
463	{stop, Result, Cont0} ->
464	    %% Convert both buffers back to file InEnc encoding
465	    {Cont, <<>>} = convert_enc(Cont0, OutEnc, InEnc),
466	    {Result, State#state{buf=cast_binary([Cont, BCont])}};
467	S ->
468	    get_line(S, {<<>>, BCont}, OutEnc, State)
469    end.
470
471convert_enc(Bins, Enc, Enc) ->
472    {cast_binary(Bins), <<>>};
473convert_enc(eof, _, _) ->
474    {<<>>, <<>>};
475convert_enc(Bin, InEnc, OutEnc) ->
476    case unicode:characters_to_binary(Bin, InEnc, OutEnc) of
477	Res when is_binary(Res) ->
478	    {Res, <<>>};
479	{incomplete, Res, Cont} ->
480	    {Res, Cont};
481	{error, _, _} ->
482	    exit({no_translation, InEnc, OutEnc})
483    end.
484
485%%
486%% Process the I/O request get_chars
487%%
488get_chars(0, Enc, #state{read_mode=ReadMode,unic=InEncoding}=State) ->
489    {reply,cast(<<>>, ReadMode,InEncoding, Enc),State};
490get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State)
491  when is_integer(N), N > 0, N =< byte_size(Buf) ->
492    {B1,B2} = split_binary(Buf, N),
493    {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}};
494get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State)
495  when is_integer(N), N > 0, N =< byte_size(Buf) ->
496    {B1,B2} = split_binary(Buf, N),
497    {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}};
498get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=latin1}=State)
499  when is_integer(N), N > 0 ->
500    BufSize = byte_size(Buf),
501    NeedSize = N-BufSize,
502    Size = erlang:max(NeedSize, ?READ_SIZE_BINARY),
503    case ?CALL_FD(Handle, read, [Size]) of
504	{ok, B} ->
505	    if BufSize+byte_size(B) < N ->
506		    std_reply(cat(Buf, B, ReadMode,latin1,OutEnc), State);
507	       true ->
508		    {B1,B2} = split_binary(B, NeedSize),
509		    {reply,cat(Buf, B1, ReadMode, latin1,OutEnc),State#state{buf=B2}}
510	    end;
511	eof when BufSize =:= 0 ->
512	    {reply,eof,State};
513	eof ->
514	    std_reply(cast(Buf, ReadMode,latin1,OutEnc), State);
515	{error,Reason}=Error ->
516	    {stop,Reason,Error,State#state{buf= <<>>}}
517    end;
518get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=InEncoding}=State)
519  when is_integer(N), N > 0 ->
520    try
521	%% This is rather tricky, we need to count the actual number of characters
522	%% in the buffer first as unicode characters are not constant in length
523	{BufCount, SplitPos} = count_and_find(Buf,N,InEncoding),
524	case BufCount >= N of
525	    true ->
526		{B1,B2} = case SplitPos of
527			      none -> {Buf,<<>>};
528			      _ ->split_binary(Buf,SplitPos)
529			  end,
530		{reply,cast(B1, ReadMode,InEncoding,OutEnc),State#state{buf=B2}};
531	    false ->
532		%% Need more, Try to read 4*needed in bytes...
533		NeedSize = (N - BufCount) * 4,
534		Size = erlang:max(NeedSize, ?READ_SIZE_BINARY),
535		case ?CALL_FD(Handle, read, [Size]) of
536		    {ok, B} ->
537			NewBuf = list_to_binary([Buf,B]),
538			{NewCount,NewSplit} = count_and_find(NewBuf,N,InEncoding),
539			case NewCount >= N of
540			    true ->
541				{B01,B02} = case NewSplit of
542						none -> {NewBuf,<<>>};
543						_ ->split_binary(NewBuf, NewSplit)
544					    end,
545				{reply,cast(B01, ReadMode,InEncoding,OutEnc),
546				 State#state{buf=B02}};
547			    false ->
548				%% Reached end of file
549				std_reply(cast(NewBuf, ReadMode,InEncoding,OutEnc),
550					  State#state{buf = <<>>})
551			end;
552		    eof when BufCount =:= 0 ->
553			{reply,eof,State};
554		    eof ->
555			std_reply(cast(Buf, ReadMode,InEncoding,OutEnc), State#state{buf = <<>>});
556		    {error,Reason}=Error ->
557			{stop,Reason,Error,State#state{buf = <<>>}}
558		end
559	end
560    catch
561	exit:ExError ->
562	    {stop,ExError,{error,ExError},State#state{buf= <<>>}}
563    end;
564
565get_chars(_N, _, #state{}=State) ->
566    {error,{error,get_chars},State}.
567
568get_chars(Mod, Func, XtraArg, OutEnc, #state{buf= <<>>}=State) ->
569    get_chars_empty(Mod, Func, XtraArg, start, OutEnc, State);
570get_chars(Mod, Func, XtraArg, OutEnc, #state{buf=Buf}=State) ->
571    get_chars_apply(Mod, Func, XtraArg, start, OutEnc, State#state{buf= <<>>}, Buf).
572
573get_chars_empty(Mod, Func, XtraArg, S, latin1,
574		#state{handle=Handle,read_mode=ReadMode, unic=latin1}=State) ->
575    case ?CALL_FD(Handle, read, [read_size(ReadMode)]) of
576	{ok,Bin} ->
577	    get_chars_apply(Mod, Func, XtraArg, S, latin1, State, Bin);
578	eof ->
579	    get_chars_apply(Mod, Func, XtraArg, S, latin1, State, eof);
580	{error,Reason}=Error ->
581	    {stop,Reason,Error,State}
582    end;
583get_chars_empty(Mod, Func, XtraArg, S, OutEnc,
584		#state{handle=Handle,read_mode=ReadMode}=State) ->
585    case ?CALL_FD(Handle, read, [read_size(ReadMode)]) of
586	{ok,Bin} ->
587	    get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, Bin);
588	eof ->
589	    get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof);
590	{error,Reason}=Error ->
591	    {stop,Reason,Error,State}
592    end.
593get_chars_notempty(Mod, Func, XtraArg, S, OutEnc,
594		   #state{handle=Handle,read_mode=ReadMode,buf = B}=State) ->
595    case ?CALL_FD(Handle, read, [read_size(ReadMode)]) of
596	{ok,Bin} ->
597	    get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, list_to_binary([B,Bin]));
598	eof ->
599	    case B of
600		<<>> ->
601		    get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof);
602		_ ->
603                    {stop,invalid_unicode,invalid_unicode_error(Mod, Func, XtraArg, S),State}
604	    end;
605	{error,Reason}=Error ->
606	    {stop,Reason,Error,State}
607    end.
608
609
610get_chars_apply(Mod, Func, XtraArg, S0, latin1,
611		#state{read_mode=ReadMode,unic=latin1}=State, Data0) ->
612    Data1 = case ReadMode of
613	       list when is_binary(Data0) -> binary_to_list(Data0);
614	       _ -> Data0
615	    end,
616    case catch Mod:Func(S0, Data1, latin1, XtraArg) of
617	{stop,Result,Buf} ->
618	    {reply,Result,State#state{buf=cast_binary(Buf)}};
619	{'EXIT',Reason} ->
620	    {stop,Reason,{error,err_func(Mod, Func, XtraArg)},State};
621	S1 ->
622	    get_chars_empty(Mod, Func, XtraArg, S1, latin1, State)
623    end;
624get_chars_apply(Mod, Func, XtraArg, S0, OutEnc,
625		#state{read_mode=ReadMode,unic=InEnc}=State, Data0) ->
626    try
627	{Data1,NewBuff} = case ReadMode of
628			      list when is_binary(Data0) ->
629				  case unicode:characters_to_list(Data0,InEnc) of
630				      {Tag,Decoded,Rest} when Decoded =/= [], Tag =:= error; Decoded =/= [], Tag =:= incomplete ->
631					  {Decoded,erlang:iolist_to_binary(Rest)};
632				      {error, [], _}  ->
633					  exit(invalid_unicode);
634				      {incomplete, [], R}  ->
635					  {[],R};
636				      List when is_list(List) ->
637					  {List,<<>>}
638				  end;
639			      binary when is_binary(Data0) ->
640				  case unicode:characters_to_binary(Data0,InEnc,OutEnc) of
641				      {Tag2,Decoded2,Rest2} when Decoded2 =/= <<>>, Tag2 =:= error; Decoded2 =/= <<>>, Tag2 =:= incomplete ->
642					  {Decoded2,erlang:iolist_to_binary(Rest2)};
643				      {error, <<>>, _} ->
644					  exit(invalid_unicode);
645				      {incomplete, <<>>, R} ->
646					  {<<>>,R};
647				      Binary when is_binary(Binary) ->
648					  {Binary,<<>>}
649				  end;
650			      _ -> %i.e. eof
651				  {Data0,<<>>}
652			  end,
653	case catch Mod:Func(S0, Data1, OutEnc, XtraArg) of
654	    {stop,Result,Buf} ->
655		{reply,Result,State#state{buf = (if
656						     is_binary(Buf) ->
657							 list_to_binary([unicode:characters_to_binary(Buf,OutEnc,InEnc),NewBuff]);
658						     is_list(Buf) ->
659							 list_to_binary([unicode:characters_to_binary(Buf,unicode,InEnc),NewBuff]);
660						     true ->
661							 NewBuff
662						end)}};
663	    {'EXIT',Reason} ->
664		{stop,Reason,{error,err_func(Mod, Func, XtraArg)},State};
665	    S1 ->
666		get_chars_notempty(Mod, Func, XtraArg, S1, OutEnc, State#state{buf=NewBuff})
667	end
668    catch
669	exit:ExReason ->
670            {stop,ExReason,invalid_unicode_error(Mod, Func, XtraArg, S0),State};
671	error:ErrReason ->
672	   {stop,ErrReason,{error,err_func(Mod, Func, XtraArg)},State}
673    end.
674
675%% A hack that tries to inform the caller about the position where the
676%% error occured.
677invalid_unicode_error(Mod, Func, XtraArg, S) ->
678    try
679        {erl_scan,tokens,_Args} = XtraArg,
680        Location = erl_scan:continuation_location(S),
681        {error,{Location, ?MODULE, invalid_unicode},Location}
682    catch
683        _:_ ->
684            {error,err_func(Mod, Func, XtraArg)}
685    end.
686
687%% Convert error code to make it look as before
688err_func(io_lib, get_until, {_,F,_}) ->
689    F.
690
691
692
693%% Process the I/O request setopts
694%%
695%% setopts
696setopts(Opts0,State) ->
697    Opts = proplists:unfold(
698	     proplists:substitute_negations(
699	       [{list,binary}],
700	       expand_encoding(Opts0))),
701    case check_valid_opts(Opts) of
702	true ->
703	    do_setopts(Opts,State);
704	false ->
705	    {error,{error,enotsup},State}
706    end.
707check_valid_opts([]) ->
708    true;
709check_valid_opts([{binary,_}|T]) ->
710    check_valid_opts(T);
711check_valid_opts([{encoding,_Enc}|T]) ->
712    check_valid_opts(T);
713check_valid_opts(_) ->
714    false.
715do_setopts(Opts, State) ->
716    case valid_enc(proplists:get_value(encoding, Opts, State#state.unic)) of
717	{ok,NewUnic} ->
718	    case proplists:get_value(binary, Opts) of
719		true ->
720		    {reply,ok,State#state{read_mode=binary, unic=NewUnic}};
721		false ->
722		    {reply,ok,State#state{read_mode=list, unic=NewUnic}};
723		undefined ->
724		    {reply,ok,State#state{unic=NewUnic}}
725	    end;
726	_ ->
727	    {error,{error,badarg},State}
728    end.
729
730getopts(#state{read_mode=RM, unic=Unic} = State) ->
731    Bin = {binary, RM =:= binary},
732    Uni = {encoding, Unic},
733    {reply,[Bin,Uni],State}.
734
735%% Concatenate two binaries and convert the result to list or binary
736cat(B1, B2, binary, latin1, latin1) ->
737    list_to_binary([B1,B2]);
738cat(B1, B2, binary, InEncoding, OutEncoding) ->
739    case unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding) of
740	Good when is_binary(Good) ->
741	    Good;
742	_ ->
743	    exit({no_translation,InEncoding,OutEncoding})
744    end;
745%% Dialyzer finds this is never used...
746%% cat(B1, B2, list, InEncoding, OutEncoding) when InEncoding =/= latin1 ->
747%%     % Catch i.e. unicode -> latin1 errors by using the outencoding although otherwise
748%%     % irrelevant for lists...
749%%     try
750%% 	unicode:characters_to_list(unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding),
751%% 				   OutEncoding)
752%%     catch
753%% 	error:_ ->
754%% 	    exit({no_translation,InEncoding,OutEncoding})
755%%     end.
756cat(B1, B2, list, latin1,_) ->
757    binary_to_list(B1)++binary_to_list(B2).
758
759%% Cast binary to list or binary
760cast(eof, _, _, _) ->
761    eof;
762cast(B, binary, latin1, latin1) ->
763    B;
764cast(B, binary, InEncoding, OutEncoding) ->
765    case unicode:characters_to_binary(B,InEncoding,OutEncoding) of
766	Good when is_binary(Good) ->
767	    Good;
768	_ ->
769	    exit({no_translation,InEncoding,OutEncoding})
770    end;
771cast(B, list, latin1, _) ->
772    binary_to_list(B);
773cast(B, list, InEncoding, OutEncoding) ->
774    try
775	unicode:characters_to_list(unicode:characters_to_binary(B,InEncoding,OutEncoding),
776				   OutEncoding)
777    catch
778	error:_ ->
779	    exit({no_translation,InEncoding,OutEncoding})
780    end.
781
782%% Convert buffer to binary
783cast_binary(Binary) when is_binary(Binary) ->
784    Binary;
785cast_binary([<<>>|List]) ->
786    cast_binary(List);
787cast_binary(List) when is_list(List) ->
788    list_to_binary(List);
789cast_binary(_EOF) ->
790    <<>>.
791
792%% Read size for different read modes
793read_size(binary) ->
794    ?READ_SIZE_BINARY;
795read_size(list) ->
796    ?READ_SIZE_LIST.
797
798%% Utf utility
799count_and_find(Bin,N,Encoding) ->
800    cafu(Bin,N,0,0,none,case Encoding of
801			   unicode -> utf8;
802			   Oth -> Oth
803			end).
804
805cafu(<<>>,0,Count,ByteCount,_SavePos,_) ->
806    {Count,ByteCount};
807cafu(<<>>,_N,Count,_ByteCount,SavePos,_) ->
808    {Count,SavePos};
809cafu(<<_/utf8,Rest/binary>>, 0, Count, ByteCount, _SavePos, utf8) ->
810    cafu(Rest,-1,Count+1,0,ByteCount,utf8);
811cafu(<<_/utf8,Rest/binary>>, N, Count, _ByteCount, SavePos, utf8) when N < 0 ->
812    cafu(Rest,-1,Count+1,0,SavePos,utf8);
813cafu(<<_/utf8,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, utf8) ->
814    Delta = byte_size(Whole) - byte_size(Rest),
815    cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,utf8);
816cafu(<<_/utf16-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,big}) ->
817    cafu(Rest,-1,Count+1,0,ByteCount,{utf16,big});
818cafu(<<_/utf16-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,big}) when N < 0 ->
819    cafu(Rest,-1,Count+1,0,SavePos,{utf16,big});
820cafu(<<_/utf16-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,big}) ->
821    Delta = byte_size(Whole) - byte_size(Rest),
822    cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,big});
823cafu(<<_/utf16-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,little}) ->
824    cafu(Rest,-1,Count+1,0,ByteCount,{utf16,little});
825cafu(<<_/utf16-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,little}) when N < 0 ->
826    cafu(Rest,-1,Count+1,0,SavePos,{utf16,little});
827cafu(<<_/utf16-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,little}) ->
828    Delta = byte_size(Whole) - byte_size(Rest),
829    cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,little});
830cafu(<<_/utf32-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,big}) ->
831    cafu(Rest,-1,Count+1,0,ByteCount,{utf32,big});
832cafu(<<_/utf32-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,big}) when N < 0 ->
833    cafu(Rest,-1,Count+1,0,SavePos,{utf32,big});
834cafu(<<_/utf32-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,big}) ->
835    Delta = byte_size(Whole) - byte_size(Rest),
836    cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,big});
837cafu(<<_/utf32-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,little}) ->
838    cafu(Rest,-1,Count+1,0,ByteCount,{utf32,little});
839cafu(<<_/utf32-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,little}) when N < 0 ->
840    cafu(Rest,-1,Count+1,0,SavePos,{utf32,little});
841cafu(<<_/utf32-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,little}) ->
842    Delta = byte_size(Whole) - byte_size(Rest),
843    cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,little});
844cafu(_Other,0,Count,ByteCount,_,_) -> % Non Unicode character,
845                                     % but found our point, OK this time
846    {Count,ByteCount};
847cafu(Other,_N,Count,0,SavePos,Enc) -> % Not enough, but valid chomped unicode
848                                       % at end.
849    case cbv(Enc,Other) of
850	false ->
851	    exit(invalid_unicode);
852	_ ->
853	    {Count,SavePos}
854    end;
855cafu(Other,_N,Count,ByteCount,none,Enc) -> % Return what we'we got this far
856					   % although not complete,
857					   % it's not (yet) in error
858    case cbv(Enc,Other) of
859	false ->
860	    exit(invalid_unicode);
861	_ ->
862	    {Count,ByteCount}
863    end;
864cafu(Other,_N,Count,_ByteCount,SavePos,Enc) -> % As above but we have
865					       % found a position
866    case cbv(Enc,Other) of
867	false ->
868	    exit(invalid_unicode);
869	_ ->
870	    {Count,SavePos}
871    end.
872
873%%
874%% Bluntly stolen from stdlib/unicode.erl (cbv means can be valid?)
875%%
876cbv(utf8,<<1:1,1:1,0:1,_:5>>) ->
877    1;
878cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) ->
879    case R of
880	<<>> ->
881	    2;
882	<<1:1,0:1,_:6>> ->
883	    1;
884	_ ->
885	    false
886    end;
887cbv(utf8,<<1:1,1:1,1:1,1:1,0:1,_:3,R/binary>>) ->
888    case R of
889	<<>> ->
890	    3;
891	<<1:1,0:1,_:6>> ->
892	    2;
893	<<1:1,0:1,_:6,1:1,0:1,_:6>> ->
894	    1;
895	_ ->
896	    false
897    end;
898cbv(utf8,_) ->
899    false;
900
901cbv({utf16,big},<<A:8>>) when A =< 215; A >= 224 ->
902    1;
903cbv({utf16,big},<<54:6,_:2>>) ->
904    3;
905cbv({utf16,big},<<54:6,_:10>>) ->
906    2;
907cbv({utf16,big},<<54:6,_:10,55:6,_:2>>) ->
908    1;
909cbv({utf16,big},_) ->
910    false;
911cbv({utf16,little},<<_:8>>) ->
912    1; % or 3, we'll see
913cbv({utf16,little},<<_:8,54:6,_:2>>) ->
914    2;
915cbv({utf16,little},<<_:8,54:6,_:2,_:8>>) ->
916    1;
917cbv({utf16,little},_) ->
918    false;
919
920
921cbv({utf32,big}, <<0:8>>) ->
922    3;
923cbv({utf32,big}, <<0:8,X:8>>) when X =< 16 ->
924    2;
925cbv({utf32,big}, <<0:8,X:8,Y:8>>)
926  when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) ->
927    1;
928cbv({utf32,big},_) ->
929    false;
930cbv({utf32,little},<<_:8>>) ->
931    3;
932cbv({utf32,little},<<_:8,_:8>>) ->
933    2;
934cbv({utf32,little},<<X:8,255:8,0:8>>) when X =:= 254; X =:= 255 ->
935    false;
936cbv({utf32,little},<<_:8,Y:8,X:8>>)
937  when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) ->
938    1;
939cbv({utf32,little},_) ->
940    false.
941
942
943%%%-----------------------------------------------------------------
944%%% ?PRIM_FILE helpers
945
946%% Compensates ?PRIM_FILE:position/2 for the number of bytes
947%% we have buffered
948position(Handle, At, Buf) ->
949    SeekTo =
950        case At of
951            {cur, Offs} -> {cur, Offs-byte_size(Buf)};
952            cur -> {cur, -byte_size(Buf)};
953            _ -> At
954        end,
955    ?CALL_FD(Handle, position, [SeekTo]).
956