1%% ``Licensed under the Apache License, Version 2.0 (the "License");
2%% you may not use this file except in compliance with the License.
3%% You may obtain a copy of the License at
4%%
5%%     http://www.apache.org/licenses/LICENSE-2.0
6%%
7%% Unless required by applicable law or agreed to in writing, software
8%% distributed under the License is distributed on an "AS IS" BASIS,
9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%% See the License for the specific language governing permissions and
11%% limitations under the License.
12%%
13%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
14%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
15%% AB. All Rights Reserved.''
16%%
17%%     $Id: beam_block.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $
18%%
19%% Purpose : Partitions assembly instructions into basic blocks and
20%% optimizes them.
21
22-module(beam_block).
23
24-export([module/2]).
25-export([live_at_entry/1]).			%Used by beam_type, beam_bool.
26-export([is_killed/2]).				%Used by beam_dead, beam_type, beam_bool.
27-export([is_not_used/2]).			%Used by beam_bool.
28-export([merge_blocks/2]).			%Used by beam_jump.
29-import(lists, [map/2,mapfoldr/3,reverse/1,reverse/2,foldl/3,
30		member/2,sort/1,all/2]).
31-define(MAXREG, 1024).
32
33module({Mod,Exp,Attr,Fs,Lc}, _Opt) ->
34    {ok,{Mod,Exp,Attr,map(fun function/1, Fs),Lc}}.
35
36function({function,Name,Arity,CLabel,Is0}) ->
37    %% Collect basic blocks and optimize them.
38    Is = blockify(Is0),
39
40    %% Done.
41    {function,Name,Arity,CLabel,Is}.
42
43%% blockify(Instructions0) -> Instructions
44%%  Collect sequences of instructions to basic blocks and
45%%  optimize the contents of the blocks. Also do some simple
46%%  optimations on instructions outside the blocks.
47
48blockify(Is) ->
49    blockify(Is, []).
50
51blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
52    %% Useless instruction sequence.
53    blockify(Is, Acc);
54blockify([{test,bs_test_tail,F,[Bits]}|Is],
55	 [{test,bs_skip_bits,F,[{integer,I},Unit,_Flags]}|Acc]) ->
56    blockify(Is, [{test,bs_test_tail,F,[Bits+I*Unit]}|Acc]);
57blockify([{test,bs_skip_bits,F,[{integer,I1},Unit1,_]}|Is],
58	 [{test,bs_skip_bits,F,[{integer,I2},Unit2,Flags]}|Acc]) ->
59    blockify(Is, [{test,bs_skip_bits,F,
60		   [{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]);
61blockify([{test,is_atom,{f,Fail},[Reg]}=I|
62	  [{select_val,Reg,{f,Fail},
63	    {list,[{atom,false},{f,_}=BrFalse,
64		   {atom,true}=AtomTrue,{f,_}=BrTrue]}}|Is]=Is0],
65	 [{block,Bl}|_]=Acc) ->
66    case is_last_bool(Bl, Reg) of
67	false ->
68	    blockify(Is0, [I|Acc]);
69	true ->
70	    blockify(Is, [{jump,BrTrue},
71			  {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc])
72    end;
73blockify([{test,is_atom,{f,Fail},[Reg]}=I|
74	  [{select_val,Reg,{f,Fail},
75	    {list,[{atom,true}=AtomTrue,{f,_}=BrTrue,
76		   {atom,false},{f,_}=BrFalse]}}|Is]=Is0],
77	 [{block,Bl}|_]=Acc) ->
78    case is_last_bool(Bl, Reg) of
79	false ->
80	    blockify(Is0, [I|Acc]);
81	true ->
82	    blockify(Is, [{jump,BrTrue},
83			  {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc])
84    end;
85blockify([I|Is0]=IsAll, Acc) ->
86    case is_bs_put(I) of
87	true ->
88	    {BsPuts0,Is} = collect_bs_puts(IsAll),
89	    BsPuts = opt_bs_puts(BsPuts0),
90	    blockify(Is, reverse(BsPuts, Acc));
91	false ->
92	    case collect(I) of
93		error -> blockify(Is0, [I|Acc]);
94		Instr when is_tuple(Instr) ->
95		    {Block0,Is} = collect_block(IsAll),
96		    Block = opt_block(Block0),
97		    blockify(Is, [{block,Block}|Acc])
98	    end
99    end;
100blockify([], Acc) -> reverse(Acc).
101
102is_last_bool([I,{'%live',_}], Reg) ->
103    is_last_bool([I], Reg);
104is_last_bool([{set,[Reg],As,{bif,N,_}}], Reg) ->
105    Ar = length(As),
106    erl_internal:new_type_test(N, Ar) orelse erl_internal:comp_op(N, Ar)
107	orelse erl_internal:bool_op(N, Ar);
108is_last_bool([_|Is], Reg) -> is_last_bool(Is, Reg);
109is_last_bool([], _) -> false.
110
111collect_block(Is) ->
112    collect_block(Is, []).
113
114collect_block([{allocate_zero,Ns,R},{test_heap,Nh,R}|Is], Acc) ->
115    collect_block(Is, [{allocate,R,{no_opt,Ns,Nh,[]}}|Acc]);
116collect_block([I|Is]=Is0, Acc) ->
117    case collect(I) of
118	error -> {reverse(Acc),Is0};
119	Instr -> collect_block(Is, [Instr|Acc])
120    end;
121collect_block([], Acc) -> {reverse(Acc),[]}.
122
123collect({allocate_zero,N,R}) -> {allocate,R,{zero,N,0,[]}};
124collect({test_heap,N,R})     -> {allocate,R,{nozero,nostack,N,[]}};
125collect({bif,N,nofail,As,D}) -> {set,[D],As,{bif,N}};
126collect({bif,N,F,As,D})      -> {set,[D],As,{bif,N,F}};
127collect({move,S,D})          -> {set,[D],[S],move};
128collect({put_list,S1,S2,D})  -> {set,[D],[S1,S2],put_list};
129collect({put_tuple,A,D})     -> {set,[D],[],{put_tuple,A}};
130collect({put,S})             -> {set,[],[S],put};
131collect({put_string,L,S,D})  -> {set,[D],[],{put_string,L,S}};
132collect({get_tuple_element,S,I,D}) -> {set,[D],[S],{get_tuple_element,I}};
133collect({set_tuple_element,S,D,I}) -> {set,[],[S,D],{set_tuple_element,I}};
134collect({get_list,S,D1,D2})  -> {set,[D1,D2],[S],get_list};
135collect(remove_message)      -> {set,[],[],remove_message};
136collect({'catch',R,L})       -> {set,[R],[],{'catch',L}};
137collect({'%live',_}=Live)    -> Live;
138collect(_)                   -> error.
139
140opt_block(Is0) ->
141    %% We explicitly move any allocate instruction upwards before optimising
142    %% moves, to avoid any potential problems with the calculation of live
143    %% registers.
144    Is1 = find_fixpoint(fun move_allocates/1, Is0),
145    Is2 = find_fixpoint(fun opt/1, Is1),
146    Is = opt_alloc(Is2),
147    share_floats(Is).
148
149find_fixpoint(OptFun, Is0) ->
150    case OptFun(Is0) of
151	Is0 -> Is0;
152	Is1 -> find_fixpoint(OptFun, Is1)
153    end.
154
155move_allocates([{set,_Ds,_Ss,{set_tuple_element,_}}|_]=Is) -> Is;
156move_allocates([{set,Ds,Ss,_Op}=Set,{allocate,R,Alloc}|Is]) when is_integer(R) ->
157    [{allocate,live_regs(Ds, Ss, R),Alloc},Set|Is];
158move_allocates([{allocate,R1,Alloc1},{allocate,R2,Alloc2}|Is]) ->
159    R1 = R2,					% Assertion.
160    move_allocates([{allocate,R1,combine_alloc(Alloc1, Alloc2)}|Is]);
161move_allocates([I|Is]) ->
162    [I|move_allocates(Is)];
163move_allocates([]) -> [].
164
165combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) ->
166    {zero,Ns,Nh1+Nh2,Init}.
167
168merge_blocks([{allocate,R,{Attr,Ns,Nh1,Init}}|B1],
169	     [{allocate,_,{_,nostack,Nh2,[]}}|B2]) ->
170    Alloc = {allocate,R,{Attr,Ns,Nh1+Nh2,Init}},
171    [Alloc|merge_blocks(B1, B2)];
172merge_blocks(B1, B2) -> merge_blocks_1(B1++[{set,[],[],stop_here}|B2]).
173
174merge_blocks_1([{set,[],_,stop_here}|Is]) -> Is;
175merge_blocks_1([{set,[D],_,move}=I|Is]) ->
176    case is_killed(D, Is) of
177	true -> merge_blocks_1(Is);
178	false -> [I|merge_blocks_1(Is)]
179    end;
180merge_blocks_1([I|Is]) -> [I|merge_blocks_1(Is)].
181
182opt([{set,[Dst],As,{bif,Bif,Fail}}=I1,
183     {set,[Dst],[Dst],{bif,'not',Fail}}=I2|Is]) ->
184    %% Get rid of the 'not' if the operation can be inverted.
185    case inverse_comp_op(Bif) of
186	none -> [I1,I2|opt(Is)];
187	RevBif -> [{set,[Dst],As,{bif,RevBif,Fail}}|opt(Is)]
188    end;
189opt([{set,[X],[X],move}|Is]) -> opt(Is);
190opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1,
191     {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is])
192  when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
193    opt([I2,I1|Is]);
194opt([{set,Ds0,Ss,Op}|Is0]) ->
195    {Ds,Is} =  opt_moves(Ds0, Is0),
196    [{set,Ds,Ss,Op}|opt(Is)];
197opt([I|Is]) -> [I|opt(Is)];
198opt([]) -> [].
199
200opt_moves([], Is0) -> {[],Is0};
201opt_moves([D0], Is0) ->
202    {D1,Is1} = opt_move(D0, Is0),
203    {[D1],Is1};
204opt_moves([X0,Y0]=Ds, Is0) ->
205    {X1,Is1} = opt_move(X0, Is0),
206    case opt_move(Y0, Is1) of
207	{Y1,Is2} when X1 =/= Y1 -> {[X1,Y1],Is2};
208	_Other when X1 =/= Y0 -> {[X1,Y0],Is1};
209	_Other -> {Ds,Is0}
210    end.
211
212opt_move(R, [{set,[D],[R],move}|Is]=Is0) ->
213    case is_killed(R, Is) of
214	true -> {D,Is};
215	false -> {R,Is0}
216    end;
217opt_move(R, [I|Is0]) ->
218    case is_transparent(R, I) of
219	true ->
220	    {D,Is1} = opt_move(R, Is0),
221	    case is_transparent(D, I) of
222		true ->  {D,[I|Is1]};
223		false -> {R,[I|Is0]}
224	    end;
225	false -> {R,[I|Is0]}
226    end;
227opt_move(R, []) -> {R,[]}.
228
229is_transparent(R, {set,Ds,Ss,_Op}) ->
230    case member(R, Ds) of
231	true -> false;
232	false -> not member(R, Ss)
233    end;
234is_transparent(_, _) -> false.
235
236%% is_killed(Register, [Instruction]) -> true|false
237%%  Determine whether a register is killed by the instruction sequence.
238%%  If true is returned, it means that the register will not be
239%%  referenced in ANY way (not even indirectly by an allocate instruction);
240%%  i.e. it is OK to enter the instruction sequence with Register
241%%  containing garbage.
242
243is_killed({x,N}=R, [{block,Blk}|Is]) ->
244    case is_killed(R, Blk) of
245	true -> true;
246	false ->
247	    %% Before looking beyond the block, we must be
248	    %% sure that the register is not referenced by
249	    %% any allocate instruction in the block.
250	    case all(fun({allocate,Live,_}) when N < Live -> false;
251			(_) -> true
252		     end, Blk) of
253		true -> is_killed(R, Is);
254		false -> false
255	    end
256    end;
257is_killed(R, [{block,Blk}|Is]) ->
258    case is_killed(R, Blk) of
259	true -> true;
260	false -> is_killed(R, Is)
261    end;
262is_killed(R, [{set,Ds,Ss,_Op}|Is]) ->
263    case member(R, Ss) of
264	true -> false;
265	false ->
266	    case member(R, Ds) of
267		true -> true;
268		false -> is_killed(R, Is)
269	    end
270    end;
271is_killed(R, [{case_end,Used}|_]) -> R =/= Used;
272is_killed(R, [{badmatch,Used}|_]) -> R =/= Used;
273is_killed(_, [if_end|_]) -> true;
274is_killed(R, [{func_info,_,_,Ar}|_]) ->
275    case R of
276	{x,X} when X < Ar -> false;
277	_ -> true
278    end;
279is_killed(R, [{kill,R}|_]) -> true;
280is_killed(R, [{kill,_}|Is]) -> is_killed(R, Is);
281is_killed(R, [{bs_init2,_,_,_,_,_,Dst}|Is]) ->
282    if
283	R =:= Dst -> true;
284	true -> is_killed(R, Is)
285    end;
286is_killed(R, [{bs_put_string,_,_}|Is]) -> is_killed(R, Is);
287is_killed({x,R}, [{'%live',Live}|_]) when R >= Live -> true;
288is_killed({x,R}, [{'%live',_}|Is]) -> is_killed(R, Is);
289is_killed({x,R}, [{allocate,Live,_}|_]) ->
290    %% Note: To be safe here, we must return either true or false,
291    %% not looking further at the instructions beyond the allocate
292    %% instruction.
293    R >= Live;
294is_killed({x,R}, [{call,Live,_}|_]) when R >= Live -> true;
295is_killed({x,R}, [{call_last,Live,_,_}|_]) when R >= Live -> true;
296is_killed({x,R}, [{call_only,Live,_}|_]) when R >= Live -> true;
297is_killed({x,R}, [{call_ext,Live,_}|_]) when R >= Live -> true;
298is_killed({x,R}, [{call_ext_last,Live,_,_}|_]) when R >= Live -> true;
299is_killed({x,R}, [{call_ext_only,Live,_}|_]) when R >= Live -> true;
300is_killed({x,R}, [return|_]) when R > 0 -> true;
301is_killed(_, _) -> false.
302
303%% is_not_used(Register, [Instruction]) -> true|false
304%%  Determine whether a register is used by the instruction sequence.
305%%  If true is returned, it means that the register will not be
306%%  referenced directly, but it may be referenced by an allocate
307%%  instruction (meaning that it is NOT allowed to contain garbage).
308
309is_not_used(R, [{block,Blk}|Is]) ->
310    case is_not_used(R, Blk) of
311	true -> true;
312	false -> is_not_used(R, Is)
313    end;
314is_not_used({x,R}=Reg, [{allocate,Live,_}|Is]) ->
315    if
316	R >= Live -> true;
317	true -> is_not_used(Reg, Is)
318    end;
319is_not_used(R, [{set,Ds,Ss,_Op}|Is]) ->
320    case member(R, Ss) of
321	true -> false;
322	false ->
323	    case member(R, Ds) of
324		true -> true;
325		false -> is_not_used(R, Is)
326	    end
327    end;
328is_not_used(R, Is) -> is_killed(R, Is).
329
330%% opt_alloc(Instructions) -> Instructions'
331%%  Optimises all allocate instructions.
332
333opt_alloc([{allocate,R,{_,Ns,Nh,[]}}|Is]) ->
334    [opt_alloc(Is, Ns, Nh, R)|opt(Is)];
335opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
336opt_alloc([]) -> [].
337
338%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
339%%  Generates the optimal sequence of instructions for
340%%  allocating and initalizing the stack frame and needed heap.
341
342opt_alloc(_Is, nostack, Nh, LivingRegs) ->
343    {allocate,LivingRegs,{nozero,nostack,Nh,[]}};
344opt_alloc(Is, Ns, Nh, LivingRegs) ->
345    InitRegs = init_yreg(Is, 0),
346    case count_ones(InitRegs) of
347	N when N*2 > Ns ->
348	    {allocate,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}};
349	_ ->
350	    {allocate,LivingRegs,{zero,Ns,Nh,[]}}
351    end.
352
353gen_init(Fs, Regs) -> gen_init(Fs, Regs, 0, []).
354
355gen_init(SameFs, _Regs, SameFs, Acc) -> reverse(Acc);
356gen_init(Fs, Regs, Y, Acc) when Regs band 1 == 0 ->
357    gen_init(Fs, Regs bsr 1, Y+1, [{init, {y,Y}}|Acc]);
358gen_init(Fs, Regs, Y, Acc) ->
359    gen_init(Fs, Regs bsr 1, Y+1, Acc).
360
361%% init_yreg(Instructions, RegSet) -> RegSetInitialized
362%%  Calculate the set of initialized y registers.
363
364init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg;
365init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg));
366init_yreg(_Is, Reg) -> Reg.
367
368add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys).
369
370add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y);
371add_yreg(_, Reg)     -> Reg.
372
373count_ones(Bits) -> count_ones(Bits, 0).
374count_ones(0, Acc) -> Acc;
375count_ones(Bits, Acc) ->
376    count_ones(Bits bsr 1, Acc + (Bits band 1)).
377
378%% live_at_entry(Is) -> NumberOfRegisters
379%%  Calculate the number of register live at the entry to the code
380%%  sequence.
381
382live_at_entry([{block,[{allocate,R,_}|_]}|_]) ->
383    R;
384live_at_entry([{label,_}|Is]) ->
385    live_at_entry(Is);
386live_at_entry([{block,Bl}|_]) ->
387    live_at_entry(Bl);
388live_at_entry([{func_info,_,_,Ar}|_]) ->
389    Ar;
390live_at_entry(Is0) ->
391    case reverse(Is0) of
392	[{'%live',Regs}|Is] -> live_at_entry_1(Is, (1 bsl Regs)-1);
393	_ -> unknown
394    end.
395
396live_at_entry_1([{set,Ds,Ss,_}|Is], Rset0) ->
397    Rset = x_live(Ss, x_dead(Ds, Rset0)),
398    live_at_entry_1(Is, Rset);
399live_at_entry_1([{allocate,_,_}|Is], Rset) ->
400    live_at_entry_1(Is, Rset);
401live_at_entry_1([], Rset) -> live_regs_1(0, Rset).
402
403%% Calculate the new number of live registers when we move an allocate
404%% instruction upwards, passing a 'set' instruction.
405
406live_regs(Ds, Ss, Regs0) ->
407    Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
408    live_regs_1(0, Rset).
409
410live_regs_1(N, 0) -> N;
411live_regs_1(N, Regs) -> live_regs_1(N+1, Regs bsr 1).
412
413x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
414x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
415x_dead([], Regs) -> Regs.
416
417x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
418x_live([_|Rs], Regs) -> x_live(Rs, Regs);
419x_live([], Regs) -> Regs.
420
421%%
422%% If a floating point literal occurs more than once, move it into
423%% a free register and re-use it.
424%%
425
426share_floats([{allocate,_,_}=Alloc|Is]) ->
427    [Alloc|share_floats(Is)];
428share_floats(Is0) ->
429    All = get_floats(Is0, []),
430    MoreThanOnce0 =  more_than_once(sort(All), gb_sets:empty()),
431    case gb_sets:is_empty(MoreThanOnce0) of
432	true -> Is0;
433	false ->
434	    MoreThanOnce = gb_sets:to_list(MoreThanOnce0),
435	    FreeX = highest_used(Is0, -1) + 1,
436	    Regs0 = make_reg_map(MoreThanOnce, FreeX, []),
437	    Regs = gb_trees:from_orddict(Regs0),
438	    Is = map(fun({set,Ds,[{float,F}],Op}=I) ->
439			     case gb_trees:lookup(F, Regs) of
440				 none -> I;
441				 {value,R} -> {set,Ds,[R],Op}
442			     end;
443			(I) -> I
444		     end, Is0),
445	    [{set,[R],[{float,F}],move} || {F,R} <- Regs0] ++ Is
446    end.
447
448get_floats([{set,_,[{float,F}],_}|Is], Acc) ->
449    get_floats(Is, [F|Acc]);
450get_floats([_|Is], Acc) ->
451    get_floats(Is, Acc);
452get_floats([], Acc) -> Acc.
453
454more_than_once([F,F|Fs], Set) ->
455    more_than_once(Fs, gb_sets:add(F, Set));
456more_than_once([_|Fs], Set) ->
457    more_than_once(Fs, Set);
458more_than_once([], Set) -> Set.
459
460highest_used([{set,Ds,Ss,_}|Is], High) ->
461    highest_used(Is, highest(Ds, highest(Ss, High)));
462highest_used([{'%live',Live}|Is], High) when Live > High ->
463    highest_used(Is, Live);
464highest_used([_|Is], High) ->
465    highest_used(Is, High);
466highest_used([], High) -> High.
467
468highest([{x,R}|Rs], High) when R > High ->
469    highest(Rs, R);
470highest([_|Rs], High) ->
471    highest(Rs, High);
472highest([], High) -> High.
473
474make_reg_map([F|Fs], R, Acc) when R < ?MAXREG ->
475    make_reg_map(Fs, R+1, [{F,{x,R}}|Acc]);
476make_reg_map(_, _, Acc) -> sort(Acc).
477
478%% inverse_comp_op(Op) -> none|RevOp
479
480inverse_comp_op('=:=') -> '=/=';
481inverse_comp_op('=/=') -> '=:=';
482inverse_comp_op('==') -> '/=';
483inverse_comp_op('/=') -> '==';
484inverse_comp_op('>') -> '=<';
485inverse_comp_op('<') -> '>=';
486inverse_comp_op('>=') -> '<';
487inverse_comp_op('=<') -> '>';
488inverse_comp_op(_) -> none.
489
490%%%
491%%% Evaluation of constant bit fields.
492%%%
493
494is_bs_put({bs_put_integer,_,_,_,_,_}) -> true;
495is_bs_put({bs_put_float,_,_,_,_,_}) -> true;
496is_bs_put(_) -> false.
497
498collect_bs_puts(Is) ->
499    collect_bs_puts_1(Is, []).
500
501collect_bs_puts_1([I|Is]=Is0, Acc) ->
502    case is_bs_put(I) of
503	false -> {reverse(Acc),Is0};
504	true -> collect_bs_puts_1(Is, [I|Acc])
505    end;
506collect_bs_puts_1([], Acc) -> {reverse(Acc),[]}.
507
508opt_bs_puts(Is) ->
509    opt_bs_1(Is, []).
510
511opt_bs_1([{bs_put_float,Fail,{integer,Sz},1,Flags0,Src}=I0|Is], Acc) ->
512    case catch eval_put_float(Src, Sz, Flags0) of
513	{'EXIT',_} ->
514	    opt_bs_1(Is, [I0|Acc]);
515	<<Int:Sz>> ->
516	    Flags = force_big(Flags0),
517	    I = {bs_put_integer,Fail,{integer,Sz},1,Flags,{integer,Int}},
518	    opt_bs_1([I|Is], Acc)
519    end;
520opt_bs_1([{bs_put_integer,_,{integer,8},1,_,{integer,_}}|_]=IsAll, Acc0) ->
521    {Is,Acc} = bs_collect_string(IsAll, Acc0),
522    opt_bs_1(Is, Acc);
523opt_bs_1([{bs_put_integer,Fail,{integer,Sz},1,F,{integer,N}}=I|Is0], Acc) when Sz > 8 ->
524    case field_endian(F) of
525	big ->
526	    case bs_split_int(N, Sz, Fail, Is0) of
527		no_split -> opt_bs_1(Is0, [I|Acc]);
528		Is -> opt_bs_1(Is, Acc)
529	    end;
530	little ->
531	    case catch <<N:Sz/little>> of
532		{'EXIT',_} ->
533		    opt_bs_1(Is0, [I|Acc]);
534		<<Int:Sz>> ->
535		    Flags = force_big(F),
536		    Is = [{bs_put_integer,Fail,{integer,Sz},1,
537			   Flags,{integer,Int}}|Is0],
538		    opt_bs_1(Is, Acc)
539	    end;
540	native -> opt_bs_1(Is0, [I|Acc])
541    end;
542opt_bs_1([{Op,Fail,{integer,Sz},U,F,Src}|Is], Acc) when U > 1 ->
543    opt_bs_1([{Op,Fail,{integer,U*Sz},1,F,Src}|Is], Acc);
544opt_bs_1([I|Is], Acc) ->
545    opt_bs_1(Is, [I|Acc]);
546opt_bs_1([], Acc) -> reverse(Acc).
547
548eval_put_float(Src, Sz, Flags) ->
549    Val = value(Src),
550    case field_endian(Flags) of
551	little -> <<Val:Sz/little-float-unit:1>>;
552	big -> <<Val:Sz/big-float-unit:1>>
553        %% native intentionally not handled here - we can't optimize it.
554    end.
555
556value({integer,I}) -> I;
557value({float,F}) -> F;
558value({atom,A}) -> A.
559
560bs_collect_string(Is, [{bs_put_string,Len,{string,Str}}|Acc]) ->
561    bs_coll_str_1(Is, Len, reverse(Str), Acc);
562bs_collect_string(Is, Acc) ->
563    bs_coll_str_1(Is, 0, [], Acc).
564
565bs_coll_str_1([{bs_put_integer,_,{integer,Sz},U,_,{integer,V}}|Is],
566	      Len, StrAcc, IsAcc) when U*Sz =:= 8 ->
567    Byte = V band 16#FF,
568    bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc);
569bs_coll_str_1(Is, Len, StrAcc, IsAcc) ->
570    {Is,[{bs_put_string,Len,{string,reverse(StrAcc)}}|IsAcc]}.
571
572field_endian({field_flags,F}) -> field_endian_1(F).
573
574field_endian_1([big=E|_]) -> E;
575field_endian_1([little=E|_]) -> E;
576field_endian_1([native=E|_]) -> E;
577field_endian_1([_|Fs]) -> field_endian_1(Fs).
578
579force_big({field_flags,F}) ->
580    {field_flags,force_big_1(F)}.
581
582force_big_1([big|_]=Fs) -> Fs;
583force_big_1([little|Fs]) -> [big|Fs];
584force_big_1([F|Fs]) -> [F|force_big_1(Fs)].
585
586bs_split_int(0, Sz, _, _) when Sz > 64 ->
587    %% We don't want to split in this case because the
588    %% string will consist of only zeroes.
589    no_split;
590bs_split_int(N, Sz, Fail, Acc) ->
591    FirstByteSz = case Sz rem 8 of
592		      0 -> 8;
593		      Rem -> Rem
594		  end,
595    bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc).
596
597bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 ->
598    Mask = (1 bsl ByteSz) - 1,
599    I = {bs_put_integer,Fail,{integer,ByteSz},1,
600	 {field_flags,[big]},{integer,N band Mask}},
601    bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]);
602bs_split_int_1(_, _, _, _, Acc) -> Acc.
603