1%%% Licensed under the Apache License, Version 2.0 (the "License");
2%%% you may not use this file except in compliance with the License.
3%%% You may obtain a copy of the License at
4%%%
5%%%     http://www.apache.org/licenses/LICENSE-2.0
6%%%
7%%% Unless required by applicable law or agreed to in writing, software
8%%% distributed under the License is distributed on an "AS IS" BASIS,
9%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%%% See the License for the specific language governing permissions and
11%%% limitations under the License.
12%%%
13%%% Copyright (C) 2000-2004 Mikael Pettersson
14%%% Copyright (C) 2004 Daniel Luna
15%%%
16%%% This is the syntax of amd64 r/m operands:
17%%%
18%%% opnd  ::= reg			mod == 11
19%%%	    | MEM[ea]			mod != 11
20%%%
21%%% ea    ::= disp32(reg)		mod == 10, r/m != ESP
22%%%  	    | disp32 sib12		mod == 10, r/m == 100
23%%%	    | disp8(reg)		mod == 01, r/m != ESP
24%%%	    | disp8 sib12		mod == 01, r/m == 100
25%%%	    | (reg)			mod == 00, r/m != ESP and EBP
26%%%	    | sib0			mod == 00, r/m == 100
27%%%	    | disp32(%rip)		mod == 00, r/m == 101
28%%%
29%%% // sib0: mod == 00
30%%% sib0  ::= disp32(,index,scale)	base == EBP, index != ESP
31%%%	    | disp32			base == EBP, index == 100
32%%%	    | (base,index,scale)	base != EBP, index != ESP
33%%%	    | (base)			base != EBP, index == 100
34%%%
35%%% // sib12: mod == 01 or 10
36%%% sib12  ::= (base,index,scale)	index != ESP
37%%%	    | (base)			index == 100
38%%%
39%%% scale ::= 00 | 01 | 10 | 11		index << scale
40%%%
41%%% Notes:
42%%%
43%%% 1. ESP cannot be used as index register.
44%%% 2. Use of ESP as base register requires a SIB byte.
45%%% 3. disp(reg), when reg != ESP, can be represented without
46%%%    [r/m == reg] or with [r/m == 100, base == reg] a SIB byte.
47%%% 4. disp32 can be represented without [mod == 00, r/m == 101]
48%%%    or with [mod == 00, r/m == 100, base == 101, index == 100]
49%%%    a SIB byte.
50%%% 5. AMD64 and x86 interpret mod==00b r/m==101b EAs differently:
51%%%    on x86 the disp32 is an absolute address, but on AMD64 the
52%%%    disp32 is relative to the %rip of the next instruction.
53
54-module(hipe_amd64_encode).
55
56-export([% condition codes
57	 cc/1,
58	 % 8-bit registers
59	 %% al/0, cl/0, dl/0, bl/0,
60	 % 32-bit registers
61	 %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
62	 % operands
63	 sindex/2, sib/1, sib/2,
64	 ea_disp32_base/2, ea_disp32_sib/2,
65	 ea_disp8_base/2, ea_disp8_sib/2,
66	 ea_base/1,
67	 ea_disp32_sindex/1, %%ea_disp32_sindex/2,
68	 ea_sib/1, %ea_disp32_rip/1,
69	 rm_reg/1, rm_mem/1,
70	 % instructions
71	 insn_encode/3, insn_sizeof/2]).
72
73%%-define(DO_HIPE_AMD64_ENCODE_TEST,true).
74-ifdef(DO_HIPE_AMD64_ENCODE_TEST).
75-export([dotest/0, dotest/1]).	% for testing, don't use
76-endif.
77
78-define(ASSERT(F,G), if G -> [] ; true -> exit({?MODULE,F}) end).
79%-define(ASSERT(F,G), []).
80
81%%% condition codes
82
83-define(CC_O,  2#0000).	% overflow
84-define(CC_NO, 2#0001).	% no overflow
85-define(CC_B,  2#0010).	% below, <u
86-define(CC_AE, 2#0011).	% above or equal, >=u
87-define(CC_E,  2#0100).	% equal
88-define(CC_NE, 2#0101).	% not equal
89-define(CC_BE, 2#0110).	% below or equal, <=u
90-define(CC_A,  2#0111).	% above, >u
91-define(CC_S,  2#1000).	% sign, +
92-define(CC_NS, 2#1001).	% not sign, -
93-define(CC_PE, 2#1010).	% parity even
94-define(CC_PO, 2#1011).	% parity odd
95-define(CC_L,  2#1100).	% less than, <s
96-define(CC_GE, 2#1101).	% greater or equal, >=s
97-define(CC_LE, 2#1110).	% less or equal, <=s
98-define(CC_G,  2#1111).	% greater than, >s
99
100cc(o) -> ?CC_O;
101cc(no) -> ?CC_NO;
102cc(b) -> ?CC_B;
103cc(ae) -> ?CC_AE;
104cc(e) -> ?CC_E;
105cc(ne) -> ?CC_NE;
106cc(be) -> ?CC_BE;
107cc(a) -> ?CC_A;
108cc(s) -> ?CC_S;
109cc(ns) -> ?CC_NS;
110cc(pe) -> ?CC_PE;
111cc(po) -> ?CC_PO;
112cc(l) -> ?CC_L;
113cc(ge) -> ?CC_GE;
114cc(le) -> ?CC_LE;
115cc(g) -> ?CC_G.
116
117%%% 8-bit registers
118
119-define(AL, 2#000).
120-define(CL, 2#001).
121-define(DL, 2#010).
122-define(BL, 2#011).
123-define(SPL, 2#100).
124-define(BPL, 2#101).
125-define(SIL, 2#110).
126-define(DIL, 2#111).
127
128%% al() -> ?AL.
129%% cl() -> ?CL.
130%% dl() -> ?DL.
131%% bl() -> ?BL.
132
133%%% 32-bit registers
134
135-define(EAX, 2#000).
136-define(ECX, 2#001).
137-define(EDX, 2#010).
138-define(EBX, 2#011).
139-define(ESP, 2#100).
140-define(EBP, 2#101).
141-define(ESI, 2#110).
142-define(EDI, 2#111).
143
144%% eax() -> ?EAX.
145%% ecx() -> ?ECX.
146%% edx() -> ?EDX.
147%% ebx() -> ?EBX.
148%% esp() -> ?ESP.
149%% ebp() -> ?EBP.
150%% esi() -> ?ESI.
151%% edi() -> ?EDI.
152
153%%% r/m operands
154
155sindex(Scale, Index) when is_integer(Scale), is_integer(Index) ->
156    ?ASSERT(sindex, Scale >= 0),
157    ?ASSERT(sindex, Scale =< 3),
158    ?ASSERT(sindex, Index =/= ?ESP),
159    {sindex, Scale, Index}.
160
161-record(sib, {sindex_opt, base :: integer()}).
162sib(Base) when is_integer(Base) -> #sib{sindex_opt=none, base=Base}.
163sib(Base, Sindex) when is_integer(Base) -> #sib{sindex_opt=Sindex, base=Base}.
164
165ea_disp32_base(Disp32, Base) when is_integer(Base) ->
166    ?ASSERT(ea_disp32_base, Base =/= ?ESP),
167    {ea_disp32_base, Disp32, Base}.
168ea_disp32_sib(Disp32, SIB) -> {ea_disp32_sib, Disp32, SIB}.
169ea_disp8_base(Disp8, Base) when is_integer(Base) ->
170    ?ASSERT(ea_disp8_base, Base =/= ?ESP),
171    {ea_disp8_base, Disp8, Base}.
172ea_disp8_sib(Disp8, SIB) -> {ea_disp8_sib, Disp8, SIB}.
173ea_base(Base) when is_integer(Base) ->
174    ?ASSERT(ea_base, Base =/= ?ESP),
175    ?ASSERT(ea_base, Base =/= ?EBP),
176    {ea_base, Base}.
177ea_disp32_sindex(Disp32) -> {ea_disp32_sindex, Disp32, none}.
178%% ea_disp32_sindex(Disp32, Sindex) -> {ea_disp32_sindex, Disp32, Sindex}.
179ea_sib(SIB) ->
180    ?ASSERT(ea_sib, SIB#sib.base =/= ?EBP),
181    {ea_sib, SIB}.
182%ea_disp32_rip(Disp32) -> {ea_disp32_rip, Disp32}.
183
184rm_reg(Reg) -> {rm_reg, Reg}.
185rm_mem(EA) -> {rm_mem, EA}.
186
187mk_modrm(Mod, RO, RM) ->
188    {rex([{r,RO}, {b,RM}]),
189     (Mod bsl 6) bor ((RO band 2#111) bsl 3) bor (RM band 2#111)}.
190
191mk_sib(Scale, Index, Base) ->
192    {rex([{x,Index}, {b,Base}]),
193     (Scale bsl 6) bor ((Index band 2#111) bsl 3) bor (Base band 2#111)}.
194
195rex(REXs) -> {rex, rex_(REXs)}.
196rex_([]) -> 0;
197rex_([{r8, Reg8}| Rest]) ->             % 8 bit registers
198    case Reg8 of
199	{rm_mem, _} -> rex_(Rest);
200	{rm_reg, R} -> rex_([{r8, R} | Rest]);
201	4 -> (1 bsl 8) bor rex_(Rest);
202	5 -> (1 bsl 8) bor rex_(Rest);
203	6 -> (1 bsl 8) bor rex_(Rest);
204	7 -> (1 bsl 8) bor rex_(Rest);
205	X when is_integer(X) -> rex_(Rest)
206    end;
207rex_([{w, REXW}| Rest]) ->              % 64-bit mode
208    (REXW bsl 3) bor rex_(Rest);
209rex_([{r, ModRM_regRegister}| Rest]) when is_integer(ModRM_regRegister) ->
210    REXR = if (ModRM_regRegister > 7) -> 1;
211              true -> 0
212           end,
213    (REXR bsl 2) bor rex_(Rest);
214rex_([{x, SIB_indexRegister}| Rest]) when is_integer(SIB_indexRegister) ->
215    REXX = if (SIB_indexRegister > 7) -> 1;
216              true -> 0
217           end,
218    (REXX bsl 1) bor rex_(Rest);
219rex_([{b, OtherRegister}| Rest]) when is_integer(OtherRegister) ->
220    %% ModRM r/m, SIB base or opcode reg
221    REXB = if (OtherRegister > 7) -> 1;
222              true -> 0
223           end,
224    REXB bor rex_(Rest).
225
226le16(Word, Tail) ->
227    [Word band 16#FF, (Word bsr 8) band 16#FF | Tail].
228
229le32(Word, Tail) when is_integer(Word) ->
230    [Word band 16#FF, (Word bsr 8) band 16#FF,
231     (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF | Tail];
232le32({Tag,Val}, Tail) ->	% a relocatable datum
233    [{le32,Tag,Val} | Tail].
234
235le64(Word, Tail) when is_integer(Word) ->
236     [ Word         band 16#FF, (Word bsr  8) band 16#FF,
237      (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF,
238      (Word bsr 32) band 16#FF, (Word bsr 40) band 16#FF,
239      (Word bsr 48) band 16#FF, (Word bsr 56) band 16#FF | Tail];
240le64({Tag,Val}, Tail) ->
241    [{le64,Tag,Val} | Tail].
242
243enc_sindex_opt({sindex,Scale,Index}) -> {Scale, Index};
244enc_sindex_opt(none) -> {2#00, 2#100}.
245
246enc_sib(#sib{sindex_opt=SindexOpt, base=Base}) ->
247    {Scale, Index} = enc_sindex_opt(SindexOpt),
248    mk_sib(Scale, Index, Base).
249
250enc_ea(EA, RO, Tail) ->
251    case EA of
252	{ea_disp32_base, Disp32, Base} ->
253	    [mk_modrm(2#10, RO, Base) | le32(Disp32, Tail)];
254	{ea_disp32_sib, Disp32, SIB} ->
255	    [mk_modrm(2#10, RO, 2#100), enc_sib(SIB) | le32(Disp32, Tail)];
256	{ea_disp8_base, Disp8, Base} ->
257	    [mk_modrm(2#01, RO, Base), Disp8 | Tail];
258	{ea_disp8_sib, Disp8, SIB} ->
259	    [mk_modrm(2#01, RO, 2#100), enc_sib(SIB), Disp8 | Tail];
260	{ea_base, Base} ->
261	    [mk_modrm(2#00, RO, Base) | Tail];
262	{ea_disp32_sindex, Disp32, SindexOpt} ->
263	    {Scale, Index} = enc_sindex_opt(SindexOpt),
264	    SIB = mk_sib(Scale, Index, 2#101),
265	    MODRM = mk_modrm(2#00, RO, 2#100),
266	    [MODRM, SIB | le32(Disp32, Tail)];
267	{ea_sib, SIB} ->
268	    [mk_modrm(2#00, RO, 2#100), enc_sib(SIB) | Tail];
269	{ea_disp32_rip, Disp32} ->
270	    [mk_modrm(2#00, RO, 2#101) | le32(Disp32, Tail)]
271    end.
272
273encode_rm(RM, RO, Tail) ->
274    case RM of
275	{rm_reg, Reg} -> [mk_modrm(2#11, RO, Reg) | Tail];
276	{rm_mem, EA} -> enc_ea(EA, RO, Tail)
277    end.
278
279%% sizeof_ea(EA) ->
280%%     case element(1, EA) of
281%% 	ea_disp32_base -> 5;
282%% 	ea_disp32_sib -> 6;
283%% 	ea_disp8_base -> 2;
284%% 	ea_disp8_sib -> 3;
285%% 	ea_base -> 1;
286%% 	ea_disp32_sindex -> 6;
287%% 	ea_sib -> 2;
288%% 	ea_disp32_rip -> 5
289%%     end.
290
291%% sizeof_rm(RM) ->
292%%    case RM of
293%%	{rm_reg, _} -> 1;
294%%	{rm_mem, EA} -> sizeof_ea(EA)
295%%    end.
296
297%%% x87 stack postitions
298
299-define(ST0, 2#000).
300-define(ST1, 2#001).
301-define(ST2, 2#010).
302-define(ST3, 2#011).
303-define(ST4, 2#100).
304-define(ST5, 2#101).
305-define(ST6, 2#110).
306-define(ST7, 2#111).
307
308st(0) -> ?ST0;
309st(1) -> ?ST1;
310st(2) -> ?ST2;
311st(3) -> ?ST3;
312st(4) -> ?ST4;
313st(5) -> ?ST5;
314st(6) -> ?ST6;
315st(7) -> ?ST7.
316
317
318%%% Instructions
319%%%
320%%% Insn	::= {Op,Opnds}
321%%% Opnds	::= {Opnd1,...,Opndn}	(n >= 0)
322%%% Opnd	::= eax | ax | al | 1 | cl
323%%%		  | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8}
324%%%		  | {rm32,RM32} | {rm16,RM16} | {rm8,RM8}
325%%%		  | {rel32,Rel32} | {rel8,Rel8}
326%%%		  | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8}
327%%%		  | {cc,CC}
328%%%		  | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8}
329%%%		  | {ea,EA}
330
331-define(PFX_OPND_16BITS, 16#66).
332
333arith_binop_encode(SubOpcode, Opnds) ->
334    %% add, or, adc, sbb, and, sub, xor, cmp
335     case Opnds of
336         {eax, {imm32,Imm32}} ->
337             [16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])];
338         {{rm32,RM32}, {imm32,Imm32}} ->
339             [16#81 | encode_rm(RM32, SubOpcode, le32(Imm32, []))];
340         {{rm32,RM32}, {imm8,Imm8}} ->
341             [16#83 | encode_rm(RM32, SubOpcode, [Imm8])];
342         {{rm32,RM32}, {reg32,Reg32}} ->
343             [16#01 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
344         {{reg32,Reg32}, {rm32,RM32}} ->
345             [16#03 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
346         %% Below starts amd64 stuff with rex prefix
347         {rax, {imm32,Imm32}} ->
348             [rex([{w,1}]), 16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])];
349         {{rm64,RM64}, {imm32,Imm32}} ->
350             [rex([{w,1}]), 16#81
351              | encode_rm(RM64, SubOpcode, le32(Imm32, []))];
352         {{rm64,RM64}, {imm8,Imm8}} ->
353             [rex([{w,1}]), 16#83 | encode_rm(RM64, SubOpcode, [Imm8])];
354         {{rm64,RM64}, {reg64,Reg64}} ->
355             [rex([{w,1}]), 16#01 bor (SubOpcode bsl 3)
356              | encode_rm(RM64, Reg64, [])];
357         {{reg64,Reg64}, {rm64,RM64}} ->
358             [rex([{w,1}]), 16#03 bor (SubOpcode bsl 3)
359              | encode_rm(RM64, Reg64, [])]
360    end.
361
362sse2_arith_binop_encode(Prefix, Opcode, {{xmm, XMM64}, {rm64fp, RM64}}) ->
363    %% addpd, cmpsd, divsd, maxsd, minsd, mulsd, sqrtsd, subsd
364    [Prefix, 16#0F, Opcode | encode_rm(RM64, XMM64, [])].
365
366sse2_cvtsi2sd_encode({{xmm,XMM64}, {rm64,RM64}}) ->
367    [rex([{w, 1}]), 16#F2, 16#0F, 16#2A | encode_rm(RM64, XMM64, [])].
368
369sse2_mov_encode(Opnds) ->
370    case Opnds of
371        {{xmm, XMM64}, {rm64fp, RM64}} -> % movsd
372            [16#F2, 16#0F, 16#10 | encode_rm(RM64, XMM64, [])];
373        {{rm64fp, RM64}, {xmm, XMM64}} -> % movsd
374            [16#F2, 16#0F, 16#11 | encode_rm(RM64, XMM64, [])]
375%        {{xmm, XMM64}, {rm64, RM64}} -> % cvtsi2sd
376%            [rex([{w, 1}]), 16#F2, 16#0F, 16#2A | encode_rm(RM64, XMM64, [])]
377    end.
378
379%% arith_binop_sizeof(Opnds) ->
380%%     %% add, or, adc, sbb, and, sub, xor, cmp
381%%    case Opnds of
382%%	{eax, {imm32,_}} ->
383%%	    1 + 4;
384%%	{{rm32,RM32}, {imm32,_}} ->
385%%	    1 + sizeof_rm(RM32) + 4;
386%%	{{rm32,RM32}, {imm8,_}} ->
387%%	    1 + sizeof_rm(RM32) + 1;
388%%	{{rm32,RM32}, {reg32,_}} ->
389%%	    1 + sizeof_rm(RM32);
390%%	{{reg32,_}, {rm32,RM32}} ->
391%%	    1 + sizeof_rm(RM32)
392%%    end.
393
394bs_op_encode(Opcode, {{reg32,Reg32}, {rm32,RM32}}) ->	% bsf, bsr
395    [16#0F, Opcode | encode_rm(RM32, Reg32, [])].
396
397%% bs_op_sizeof({{reg32,_}, {rm32,RM32}}) ->		% bsf, bsr
398%%    2 + sizeof_rm(RM32).
399
400bswap_encode(Opnds) ->
401    case Opnds of
402	{{reg32,Reg32}} ->
403	    [rex([{b, Reg32}]), 16#0F, 16#C8 bor (Reg32 band 2#111)];
404	{{reg64,Reg64}} ->
405	    [rex([{w, 1}, {b, Reg64}]), 16#0F, 16#C8 bor (Reg64 band 2#111)]
406    end.
407
408%% bswap_sizeof({{reg32,_}}) ->
409%%    2.
410
411bt_op_encode(SubOpcode, Opnds) ->	% bt, btc, btr, bts
412    case Opnds of
413	{{rm32,RM32}, {reg32,Reg32}} ->
414	    [16#0F, 16#A3 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
415	{{rm32,RM32}, {imm8,Imm8}} ->
416	    [16#0F, 16#BA | encode_rm(RM32, SubOpcode, [Imm8])]
417    end.
418
419%% bt_op_sizeof(Opnds) ->			% bt, btc, btr, bts
420%%    case Opnds of
421%%	{{rm32,RM32}, {reg32,_}} ->
422%%	    2 + sizeof_rm(RM32);
423%%	{{rm32,RM32}, {imm8,_}} ->
424%%	    2 + sizeof_rm(RM32) + 1
425%%    end.
426
427call_encode(Opnds) ->
428    case Opnds of
429	{{rel32,Rel32}} ->
430	    [16#E8 | le32(Rel32, [])];
431%%% 	{{rm32,RM32}} ->
432%%% 	    [16#FF | encode_rm(RM32, 2#010, [])];
433	{{rm64,RM64}} -> % Defaults to 64 bits on amd64
434	    [16#FF | encode_rm(RM64, 2#010, [])]
435    end.
436
437%% call_sizeof(Opnds) ->
438%%    case Opnds of
439%%	{{rel32,_}} ->
440%%	    1 + 4;
441%%	{{rm32,RM32}} ->
442%%	    1 + sizeof_rm(RM32)
443%%    end.
444
445cbw_encode({}) ->
446    [?PFX_OPND_16BITS, 16#98].
447
448cbw_sizeof({}) ->
449    2.
450
451nullary_op_encode(Opcode, {}) ->
452    %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std
453    [Opcode].
454
455nullary_op_sizeof({}) ->
456    %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std
457    1.
458
459cmovcc_encode({{cc,CC}, {reg32,Reg32}, {rm32,RM32}}) ->
460    [16#0F, 16#40 bor CC | encode_rm(RM32, Reg32, [])].
461
462%% cmovcc_sizeof({{cc,_}, {reg32,_}, {rm32,RM32}}) ->
463%%    2 + sizeof_rm(RM32).
464
465incdec_encode(SubOpcode, Opnds) ->	% SubOpcode is either 0 or 1
466    case Opnds of
467	{{rm32,RM32}} ->
468	    [16#FF | encode_rm(RM32, SubOpcode, [])];
469	{{rm64,RM64}} ->
470	    [rex([{w, 1}]), 16#FF | encode_rm(RM64, SubOpcode, [])]
471    end.
472
473%% incdec_sizeof(Opnds) ->
474%%    case Opnds of
475%%	{{rm32,RM32}} ->
476%%	    1 + sizeof_rm(RM32);
477%%	{{reg32,_}} ->
478%%	    1
479%%    end.
480
481arith_unop_encode(Opcode, Opnds) ->  % div, idiv, mul, neg, not
482    case Opnds of
483	{{rm32,RM32}} ->
484	    [16#F7 | encode_rm(RM32, Opcode, [])];
485	{{rm64,RM64}} ->
486	    [rex([{w,1}]), 16#F7 | encode_rm(RM64, Opcode, [])]
487    end.
488
489%% arith_unop_sizeof({{rm32,RM32}}) ->	% div, idiv, mul, neg, not
490%%    1 + sizeof_rm(RM32).
491
492enter_encode({{imm16,Imm16}, {imm8,Imm8}}) ->
493    [16#C8 | le16(Imm16, [Imm8])].
494
495enter_sizeof({{imm16,_}, {imm8,_}}) ->
496    1 + 2 + 1.
497
498imul_encode(Opnds) ->
499    case Opnds of
500	{{rm32,RM32}} ->				% <edx,eax> *= rm32
501	    [16#F7 | encode_rm(RM32, 2#101, [])];
502	{{rm64,RM64}} ->
503	    [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#101, [])];
504	{{reg32,Reg32}, {rm32,RM32}} ->			% reg *= rm32
505	    [16#0F, 16#AF | encode_rm(RM32, Reg32, [])];
506	{{reg64,Reg64}, {rm64,RM64}} ->
507	    [rex([{w,1}]), 16#0F, 16#AF | encode_rm(RM64, Reg64, [])];
508	{{reg32,Reg32}, {rm32,RM32}, {imm8,Imm8}} ->	% reg := rm32 * sext(imm8)
509	    [16#6B | encode_rm(RM32, Reg32, [Imm8])];
510	{{reg64,Reg64}, {rm64,RM64}, {imm8,Imm8}} ->
511	    [rex([{w,1}]), 16#6B | encode_rm(RM64, Reg64, [Imm8])];
512	{{reg32,Reg32}, {rm32,RM32}, {imm32,Imm32}} ->	% reg := rm32 * imm32
513	    [16#69 | encode_rm(RM32, Reg32, le32(Imm32, []))];
514	{{reg64,Reg64}, {rm64,RM64}, {imm32,Imm32}} ->
515	    [rex([{w,1}]), 16#69 | encode_rm(RM64, Reg64, le32(Imm32, []))]
516    end.
517
518%% imul_sizeof(Opnds) ->
519%%    case Opnds of
520%%	{{rm32,RM32}} ->
521%%	    1 + sizeof_rm(RM32);
522%%	{{reg32,_}, {rm32,RM32}} ->
523%%	    2 + sizeof_rm(RM32);
524%%	{{reg32,_}, {rm32,RM32}, {imm8,_}} ->
525%%	    1 + sizeof_rm(RM32) + 1;
526%%	{{reg32,_}, {rm32,RM32}, {imm32,_}} ->
527%%	    1 + sizeof_rm(RM32) + 4
528%%    end.
529
530jcc_encode(Opnds) ->
531    case Opnds of
532	{{cc,CC}, {rel8,Rel8}} ->
533	    [16#70 bor CC, Rel8];
534	{{cc,CC}, {rel32,Rel32}} ->
535	    [16#0F, 16#80 bor CC | le32(Rel32, [])]
536    end.
537
538jcc_sizeof(Opnds) ->
539    case Opnds of
540       {{cc,_}, {rel8,_}} ->
541	   2;
542       {{cc,_}, {rel32,_}} ->
543	   2 + 4
544   end.
545
546jmp8_op_encode(Opcode, {{rel8,Rel8}}) ->	% jecxz, loop, loope, loopne
547    [Opcode, Rel8].
548
549jmp8_op_sizeof({{rel8,_}}) ->			% jecxz, loop, loope, loopne
550    2.
551
552jmp_encode(Opnds) ->
553    case Opnds of
554	{{rel8,Rel8}} ->
555	    [16#EB, Rel8];
556	{{rel32,Rel32}} ->
557	    [16#E9 | le32(Rel32, [])];
558%%% 	{{rm32,RM32}} ->
559%%% 	    [16#FF | encode_rm(RM32, 2#100, [])]
560	{{rm64,RM64}} ->
561	    [16#FF | encode_rm(RM64, 2#100, [])]
562    end.
563
564%% jmp_sizeof(Opnds) ->
565%%    case Opnds of
566%%	{{rel8,_}} ->
567%%	    2;
568%%	{{rel32,_}} ->
569%%	    1 + 4;
570%%	{{rm32,RM32}} ->
571%%	    1 + sizeof_rm(RM32)
572%%    end.
573
574lea_encode({{reg32,Reg32}, {ea,EA}}) ->
575    [16#8D | enc_ea(EA, Reg32, [])];
576lea_encode({{reg64,Reg64}, {ea,EA}}) ->
577    [rex([{w, 1}]), 16#8D | enc_ea(EA, Reg64, [])].
578
579%% lea_sizeof({{reg32,_}, {ea,EA}}) ->
580%%    1 + sizeof_ea(EA).
581
582mov_encode(Opnds) ->
583    case Opnds of
584	{{rm8,RM8}, {reg8,Reg8}} ->
585	    [rex([{r8, RM8}, {r8, Reg8}]), 16#88 | encode_rm(RM8, Reg8, [])];
586	{{rm16,RM16}, {reg16,Reg16}} ->
587	    [?PFX_OPND_16BITS, 16#89 | encode_rm(RM16, Reg16, [])];
588	{{rm32,RM32}, {reg32,Reg32}} ->
589	    [16#89 | encode_rm(RM32, Reg32, [])];
590	{{rm64,RM64}, {reg64,Reg64}} ->
591	    [rex([{w, 1}]), 16#89 | encode_rm(RM64, Reg64, [])];
592	{{reg8,Reg8}, {rm8,RM8}} ->
593	    [rex([{r8, RM8}, {r8, Reg8}]), 16#8A |
594	     encode_rm(RM8, Reg8, [])];
595	{{reg16,Reg16}, {rm16,RM16}} ->
596	    [?PFX_OPND_16BITS, 16#8B | encode_rm(RM16, Reg16, [])];
597	{{reg32,Reg32}, {rm32,RM32}} ->
598	    [16#8B | encode_rm(RM32, Reg32, [])];
599	{{reg64,Reg64}, {rm64,RM64}} ->
600	    [rex([{w, 1}]), 16#8B | encode_rm(RM64, Reg64, [])];
601	{al, {moffs8,Moffs8}} ->
602	    [16#A0 | le32(Moffs8, [])];
603	{ax, {moffs16,Moffs16}} ->
604	    [?PFX_OPND_16BITS, 16#A1 | le32(Moffs16, [])];
605	{eax, {moffs32,Moffs32}} ->
606	    [16#A1 | le32(Moffs32, [])];
607	{rax, {moffs32,Moffs32}} ->
608	    [rex([{w, 1}]), 16#A1 | le32(Moffs32, [])];
609	{{moffs8,Moffs8}, al} ->
610	    [16#A2 | le32(Moffs8, [])];
611	{{moffs16,Moffs16}, ax} ->
612	    [?PFX_OPND_16BITS, 16#A3 | le32(Moffs16, [])];
613	{{moffs32,Moffs32}, eax} ->
614	    [16#A3 | le32(Moffs32, [])];
615	{{moffs32,Moffs32}, rax} ->
616	    [rex([{w, 1}]), 16#A3 | le32(Moffs32, [])];
617	{{reg8,Reg8}, {imm8,Imm8}} ->
618	    [rex([{b, Reg8}, {r8, Reg8}]), 16#B0 bor (Reg8 band 2#111), Imm8];
619	{{reg16,Reg16}, {imm16,Imm16}} ->
620	    [?PFX_OPND_16BITS, rex([{b, Reg16}]), 16#B8 bor (Reg16 band 2#111)
621             | le16(Imm16, [])];
622	{{reg32,Reg32}, {imm32,Imm32}} ->
623	    [rex([{b, Reg32}]), 16#B8 bor (Reg32 band 2#111)
624             | le32(Imm32, [])];
625	{{reg64,Reg64}, {imm64,Imm64}} ->
626	    [rex([{w, 1}, {b, Reg64}]), 16#B8 bor (Reg64 band 2#111)
627             | le64(Imm64, [])];
628	{{rm8,RM8}, {imm8,Imm8}} ->
629	    [rex([{r8, RM8}]), 16#C6 | encode_rm(RM8, 2#000, [Imm8])];
630	{{rm16,RM16}, {imm16,Imm16}} ->
631	    [?PFX_OPND_16BITS, 16#C7 |
632             encode_rm(RM16, 2#000, le16(Imm16, []))];
633	{{rm32,RM32}, {imm32,Imm32}} ->
634	    [16#C7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
635	{{rm64,RM64}, {imm32,Imm32}} ->
636	    [rex([{w, 1}]), 16#C7 | encode_rm(RM64, 2#000, le32(Imm32, []))]
637    end.
638
639%% mov_sizeof(Opnds) ->
640%%     case Opnds of
641%% 	{{rm8,RM8}, {reg8,_}} ->
642%% 	    1 + sizeof_rm(RM8);
643%% 	{{rm16,RM16}, {reg16,_}} ->
644%% 	    2 + sizeof_rm(RM16);
645%% 	{{rm32,RM32}, {reg32,_}} ->
646%% 	    1 + sizeof_rm(RM32);
647%% 	{{reg8,_}, {rm8,RM8}} ->
648%% 	    1 + sizeof_rm(RM8);
649%% 	{{reg16,_}, {rm16,RM16}} ->
650%% 	    2 + sizeof_rm(RM16);
651%% 	{{reg32,_}, {rm32,RM32}} ->
652%% 	    1 + sizeof_rm(RM32);
653%% 	{al, {moffs8,_}} ->
654%% 	    1 + 4;
655%% 	{ax, {moffs16,_}} ->
656%% 	    2 + 4;
657%% 	{eax, {moffs32,_}} ->
658%% 	    1 + 4;
659%% 	{{moffs8,_}, al} ->
660%% 	    1 + 4;
661%% 	{{moffs16,_}, ax} ->
662%% 	    2 + 4;
663%% 	{{moffs32,_}, eax} ->
664%% 	    1 + 4;
665%% 	{{reg8,_}, {imm8,_}} ->
666%% 	    2;
667%% 	{{reg16,_}, {imm16,_}} ->
668%% 	    2 + 2;
669%% 	{{reg32,_}, {imm32,_}} ->
670%% 	    1 + 4;
671%% 	{{rm8,RM8}, {imm8,_}} ->
672%% 	    1 + sizeof_rm(RM8) + 1;
673%% 	{{rm16,RM16}, {imm16,_}} ->
674%% 	    2 + sizeof_rm(RM16) + 2;
675%% 	{{rm32,RM32}, {imm32,_}} ->
676%% 	    1 + sizeof_rm(RM32) + 4
677%%     end.
678
679movx_op_encode(Opcode, Opnds) ->	% movsx, movzx
680    case Opnds of
681	{{reg16,Reg16}, {rm8,RM8}} ->
682	    [?PFX_OPND_16BITS, rex([{r8, RM8}]), 16#0F, Opcode |
683	     encode_rm(RM8, Reg16, [])];
684	{{reg32,Reg32}, {rm8,RM8}} ->
685	    [rex([{r8, RM8}]), 16#0F, Opcode | encode_rm(RM8, Reg32, [])];
686	{{reg32,Reg32}, {rm16,RM16}} ->
687	    [16#0F, Opcode bor 1 | encode_rm(RM16, Reg32, [])];
688	{{reg64,Reg64}, {rm8,RM8}} ->
689	    [rex([{w,1}]), 16#0F, Opcode | encode_rm(RM8, Reg64, [])];
690	{{reg64,Reg64}, {rm16,RM16}} ->
691	    [rex([{w,1}]), 16#0F, Opcode bor 1 | encode_rm(RM16, Reg64, [])];
692	{{reg64,Reg64}, {rm32,RM32}} ->
693            %% This is magic... /Luna
694	    [rex([{w,(1 band (Opcode bsr 3))}]), 16#63 |
695             encode_rm(RM32, Reg64, [])]
696    end.
697
698%% movx_op_sizeof(Opnds) ->
699%%    case Opnds of
700%%	{{reg16,_}, {rm8,RM8}} ->
701%%	    3 + sizeof_rm(RM8);
702%%	{{reg32,_}, {rm8,RM8}} ->
703%%	    1 + 2 + sizeof_rm(RM8);
704%%	{{reg32,_}, {rm16,RM16}} ->
705%%	    1 + 2 + sizeof_rm(RM16)
706%%    end.
707
708pop_encode(Opnds) ->
709    case Opnds of
710	{{rm64,RM64}} ->
711	    [16#8F | encode_rm(RM64, 2#000, [])];
712	{{reg64,Reg64}} ->
713	    [rex([{b,Reg64}]),16#58 bor (Reg64 band 2#111)]
714    end.
715
716%% pop_sizeof(Opnds) ->
717%%    case Opnds of
718%%	{{rm32,RM32}} ->
719%%	    1 + sizeof_rm(RM32);
720%%	{{reg32,_}} ->
721%%	    1
722%%    end.
723
724push_encode(Opnds) ->
725    case Opnds of
726%%% 	{{rm32,RM32}} ->
727%%% 	    [16#FF | encode_rm(RM32, 2#110, [])];
728	{{rm64,RM64}} ->
729	    [16#FF | encode_rm(RM64, 2#110, [])];
730%%% 	{{reg32,Reg32}} ->
731%%% 	    [rex([{b, 1}]), 16#50 bor (Reg32 band 2#111)];
732	{{reg64,Reg64}} ->
733	    [rex([{b, Reg64}]), 16#50 bor (Reg64 band 2#111)];
734	{{imm8,Imm8}} ->	% sign-extended
735	    [16#6A, Imm8];
736	{{imm32,Imm32}} -> % Sign extended to 64 bits
737	    [16#68 | le32(Imm32, [])]
738    end.
739
740%% push_sizeof(Opnds) ->
741%%    case Opnds of
742%%	{{rm32,RM32}} ->
743%%	    1 + sizeof_rm(RM32);
744%%	{{reg32,_}} ->
745%%	    1;
746%%	{{imm8,_}} ->
747%%	    2;
748%%	{{imm32,_}} ->
749%%	    1 + 4
750%%    end.
751
752shift_op_encode(SubOpcode, Opnds) ->	% rol, ror, rcl, rcr, shl, shr, sar
753     case Opnds of
754         {{rm32,RM32}, 1} ->
755             [16#D1 | encode_rm(RM32, SubOpcode, [])];
756         {{rm32,RM32}, cl} ->
757             [16#D3 | encode_rm(RM32, SubOpcode, [])];
758         {{rm32,RM32}, {imm8,Imm8}} ->
759             [16#C1 | encode_rm(RM32, SubOpcode, [Imm8])];
760         {{rm64,RM64}, 1} ->
761             [rex([{w,1}]), 16#D1 | encode_rm(RM64, SubOpcode, [])];
762         {{rm64,RM64}, cl} ->
763             [rex([{w,1}]), 16#D3 | encode_rm(RM64, SubOpcode, [])];
764         {{rm64,RM64}, {imm8,Imm8}} ->
765             [rex([{w,1}]), 16#C1 | encode_rm(RM64, SubOpcode, [Imm8])]
766     end.
767
768%% shift_op_sizeof(Opnds) ->		% rcl, rcr, rol, ror, sar, shl, shr
769%%     case Opnds of
770%% 	{{rm32,RM32}, 1} ->
771%% 	    1 + sizeof_rm(RM32);
772%% 	{{rm32,RM32}, cl} ->
773%% 	    1 + sizeof_rm(RM32);
774%% 	{{rm32,RM32}, {imm8,_Imm8}} ->
775%% 	    1 + sizeof_rm(RM32) + 1
776%%     end.
777
778ret_encode(Opnds) ->
779    case Opnds of
780	{} ->
781	    [16#C3];
782	{{imm16,Imm16}} ->
783	    [16#C2 | le16(Imm16, [])]
784    end.
785
786ret_sizeof(Opnds) ->
787    case Opnds of
788	{} ->
789	    1;
790	{{imm16,_}} ->
791	    1 + 2
792    end.
793
794setcc_encode({{cc,CC}, {rm8,RM8}}) ->
795    [rex([{r8, RM8}]), 16#0F, 16#90 bor CC | encode_rm(RM8, 2#000, [])].
796
797%% setcc_sizeof({{cc,_}, {rm8,RM8}}) ->
798%%    2 + sizeof_rm(RM8).
799
800shd_op_encode(Opcode, Opnds) ->
801    case Opnds of
802	{{rm32,RM32}, {reg32,Reg32}, {imm8,Imm8}} ->
803	    [16#0F, Opcode | encode_rm(RM32, Reg32, [Imm8])];
804	{{rm32,RM32}, {reg32,Reg32}, cl} ->
805	    [16#0F, Opcode bor 1 | encode_rm(RM32, Reg32, [])]
806    end.
807
808%% shd_op_sizeof(Opnds) ->
809%%    case Opnds of
810%%	{{rm32,RM32}, {reg32,_}, {imm8,_}} ->
811%%	    2 + sizeof_rm(RM32) + 1;
812%%	{{rm32,RM32}, {reg32,_}, cl} ->
813%%	    2 + sizeof_rm(RM32)
814%%    end.
815
816test_encode(Opnds) ->
817    case Opnds of
818	{al, {imm8,Imm8}} ->
819	    [16#A8, Imm8];
820	{ax, {imm16,Imm16}} ->
821	    [?PFX_OPND_16BITS, 16#A9 | le16(Imm16, [])];
822	{eax, {imm32,Imm32}} ->
823	    [16#A9 | le32(Imm32, [])];
824	{rax, {imm32,Imm32}} ->
825	    [rex([{w,1}]), 16#A9 | le32(Imm32, [])];
826	{{rm8,RM8}, {imm8,Imm8}} ->
827	    [rex([{r8,RM8}]), 16#F6 | encode_rm(RM8, 2#000, [Imm8])];
828	{{rm16,RM16}, {imm16,Imm16}} ->
829	    [?PFX_OPND_16BITS, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
830	{{rm32,RM32}, {imm32,Imm32}} ->
831	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
832	{{rm64,RM64}, {imm32,Imm32}} ->
833	    [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#000, le32(Imm32, []))];
834	{{rm32,RM32}, {reg32,Reg32}} ->
835	    [16#85 | encode_rm(RM32, Reg32, [])];
836	{{rm64,RM64}, {reg64,Reg64}} ->
837	    [rex([{w,1}]), 16#85 | encode_rm(RM64, Reg64, [])]
838    end.
839
840%% test_sizeof(Opnds) ->
841%%     case Opnds of
842%% 	{eax, {imm32,_}} ->
843%% 	    1 + 4;
844%% 	{{rm32,RM32}, {imm32,_}} ->
845%% 	    1 + sizeof_rm(RM32) + 4;
846%% 	{{rm32,RM32}, {reg32,_}} ->
847%% 	    1 + sizeof_rm(RM32)
848%%     end.
849
850fild_encode(Opnds) ->
851    %% The operand cannot be a register!
852    {{rm64, RM64}} = Opnds,
853    [16#DB | encode_rm(RM64, 2#000, [])].
854
855%% fild_sizeof(Opnds) ->
856%%    {{rm32, RM32}} = Opnds,
857%%    1 + sizeof_rm(RM32).
858
859fld_encode(Opnds) ->
860    case Opnds of
861	{{rm64fp, RM64fp}} ->
862	    [16#DD | encode_rm(RM64fp, 2#000, [])];
863	{{fpst, St}} ->
864	    [16#D9, 16#C0 bor st(St)]
865    end.
866
867%% fld_sizeof(Opnds) ->
868%%    case Opnds of
869%%	{{rm64fp, RM64fp}} ->
870%%	    1 + sizeof_rm(RM64fp);
871%%	{{fpst, _}} ->
872%%	    2
873%%    end.
874
875x87_comm_arith_encode(OpCode, Opnds) ->
876    %% fadd, fmul
877    case Opnds of
878	{{rm64fp, RM64fp}} ->
879	    [16#DC | encode_rm(RM64fp, OpCode, [])];
880	{{fpst,0}, {fpst,St}} ->
881	    [16#D8, (16#C0 bor (OpCode bsl 3)) bor st(St)];
882	{{fpst,St}, {fpst,0}} ->
883	    [16#DC, (16#C0 bor (OpCode bsl 3)) bor st(St)]
884    end.
885
886x87_comm_arith_pop_encode(OpCode, Opnds) ->
887    %% faddp, fmulp
888    case Opnds of
889	[] ->
890	    [16#DE, 16#C0 bor (OpCode bsl 3) bor st(1)];
891	{{fpst,St},{fpst,0}} ->
892	    [16#DE, 16#C0 bor (OpCode bsl 3) bor st(St)]
893    end.
894
895x87_arith_encode(OpCode, Opnds) ->
896    %% fdiv, fsub
897    case Opnds of
898	{{rm64fp, RM64fp}} ->
899	    [16#DC | encode_rm(RM64fp, OpCode, [])];
900	{{fpst,0}, {fpst,St}} ->
901	    OpCode0 = OpCode band 2#110,
902	    [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)];
903	{{fpst,St}, {fpst,0}} ->
904	    OpCode0 = OpCode bor 1,
905	    [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
906    end.
907
908x87_arith_pop_encode(OpCode, Opnds) ->
909    %% fdivp, fsubp
910    OpCode0 = OpCode bor 1,
911    case Opnds of
912	[] ->
913	    [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(1)];
914	{{fpst,St}, {fpst,0}} ->
915	    [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(St)]
916    end.
917
918x87_arith_rev_encode(OpCode, Opnds) ->
919    %% fdivr, fsubr
920    case Opnds of
921	{{rm64fp, RM64fp}} ->
922	    [16#DC | encode_rm(RM64fp, OpCode, [])];
923	{{fpst,0}, {fpst,St}} ->
924	    OpCode0 = OpCode bor 1,
925	    [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)];
926	{{fpst,St}, {fpst,0}} ->
927	    OpCode0 = OpCode band 2#110,
928	    [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
929    end.
930
931x87_arith_rev_pop_encode(OpCode, Opnds) ->
932    %% fdivrp, fsubrp
933    OpCode0 = OpCode band 2#110,
934    case Opnds of
935	[] ->
936	    [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(1)];
937	{{fpst,St}, {fpst, 0}} ->
938	    [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
939    end.
940
941%% x87_arith_sizeof(Opnds) ->
942%%    case Opnds of
943%%	{{rm64fp, RM64fp}} ->
944%%	    1 + sizeof_rm(RM64fp);
945%%	{{fpst,0}, {fpst,_}} ->
946%%	    2;
947%%	{{fpst,_}, {fpst,0}} ->
948%%	    2
949%%    end.
950
951fst_encode(OpCode, Opnds) ->
952    case Opnds of
953	{{rm64fp, RM64fp}} ->
954	    [16#DD | encode_rm(RM64fp, OpCode, [])];
955	{{fpst, St}} ->
956	    [16#DD, 16#C0 bor (OpCode bsl 3) bor st(St)]
957    end.
958
959%% fst_sizeof(Opnds) ->
960%%     case Opnds of
961%% 	{{rm64fp, RM64fp}} ->
962%% 	    1 + sizeof_rm(RM64fp);
963%% 	{{fpst, _}} ->
964%% 	    2
965%%     end.
966
967fchs_encode() ->
968    [16#D9, 16#E0].
969
970fchs_sizeof() ->
971    2.
972
973ffree_encode({{fpst, St}})->
974    [16#DD, 16#C0 bor st(St)].
975
976ffree_sizeof() ->
977    2.
978
979fwait_encode() ->
980    [16#9B].
981
982fwait_sizeof() ->
983    1.
984
985fxch_encode(Opnds) ->
986    case Opnds of
987	[] ->
988	    [16#D9, 16#C8 bor st(1)];
989	{{fpst, St}} ->
990	    [16#D9, 16#C8 bor st(St)]
991    end.
992
993fxch_sizeof() ->
994    2.
995
996insn_encode(Op, Opnds, Offset) ->
997    Bytes_and_REX = insn_encode_internal(Op, Opnds),
998    Bytes         = fix_rex(Bytes_and_REX),
999    case has_relocs(Bytes) of
1000	false ->	% the common case
1001	    {Bytes, []};
1002	_ ->
1003	    fix_relocs(Bytes, Offset, [], [])
1004    end.
1005
1006fix_rex(Bytes) ->
1007    fix_rex(Bytes, 2#0100 bsl 4, []).
1008
1009fix_rex([{rex, REX} | Rest], REXAcc, Bytes) ->
1010    fix_rex(Rest, REXAcc bor REX, Bytes);
1011fix_rex([{{rex, REX}, Byte} | Rest], REXAcc, Bytes) ->
1012    fix_rex(Rest, REXAcc bor REX, [Byte | Bytes]);
1013fix_rex([Byte | Rest], REXAcc, Bytes) ->
1014    fix_rex(Rest, REXAcc, [Byte | Bytes]);
1015fix_rex([], 2#01000000, Bytes) ->              % no rex prefix
1016    lists:reverse(Bytes);
1017fix_rex([], REX0, Bytes) ->                    % rex prefix...
1018    REX = REX0 band 16#FF, % for 8 bit registers
1019    [Head|Tail] = lists:reverse(Bytes),
1020    case Head of
1021        16#66 ->                               % ...and 16 bit/sse2 prefix
1022            [16#66, REX | Tail];
1023	16#F2 ->                               % ...and sse2 prefix
1024	    [16#F2, REX | Tail];
1025        _ ->                                   % ...only
1026           [REX, Head | Tail]
1027    end.
1028
1029has_relocs([{le32,_,_}|_]) -> true;
1030has_relocs([{le64,_,_}|_]) -> true;
1031has_relocs([_|Bytes]) -> has_relocs(Bytes);
1032has_relocs([]) -> false.
1033
1034fix_relocs([{le32,Tag,Val}|Bytes], Offset, Code, Relocs) ->
1035    fix_relocs(Bytes, Offset+4,
1036	       [16#00, 16#00, 16#00, 16#00 | Code],
1037	       [{Tag,Offset,Val}|Relocs]);
1038fix_relocs([{le64,Tag,Val}|Bytes], Offset, Code, Relocs) ->
1039    fix_relocs(Bytes, Offset+8,
1040	       [16#00, 16#00, 16#00, 16#00,
1041                16#00, 16#00, 16#00, 16#00 | Code],
1042	       [{Tag,Offset,Val}|Relocs]);
1043fix_relocs([Byte|Bytes], Offset, Code, Relocs) ->
1044    fix_relocs(Bytes, Offset+1, [Byte|Code], Relocs);
1045fix_relocs([], _Offset, Code, Relocs) ->
1046    {lists:reverse(Code), lists:reverse(Relocs)}.
1047
1048insn_encode_internal(Op, Opnds) ->
1049    case Op of
1050	'adc' -> arith_binop_encode(2#010, Opnds);
1051	'add' -> arith_binop_encode(2#000, Opnds);
1052	'and' -> arith_binop_encode(2#100, Opnds);
1053	'bsf' -> bs_op_encode(16#BC, Opnds);
1054	'bsr' -> bs_op_encode(16#BD, Opnds);
1055	'bswap' -> bswap_encode(Opnds);
1056	'bt' -> bt_op_encode(2#100, Opnds);
1057	'btc' -> bt_op_encode(2#111, Opnds);
1058	'btr' -> bt_op_encode(2#110, Opnds);
1059	'bts' -> bt_op_encode(2#101, Opnds);
1060	'call' -> call_encode(Opnds);
1061	'cbw' -> cbw_encode(Opnds);
1062	'cdq' -> nullary_op_encode(16#99, Opnds);
1063	'clc' -> nullary_op_encode(16#F8, Opnds);
1064	'cld' -> nullary_op_encode(16#FC, Opnds);
1065	'cmc' -> nullary_op_encode(16#F5, Opnds);
1066	'cmovcc' -> cmovcc_encode(Opnds);
1067	'cmp' -> arith_binop_encode(2#111, Opnds);
1068	'cwde' -> nullary_op_encode(16#98, Opnds);
1069	'dec' -> incdec_encode(2#001, Opnds);
1070	'div' -> arith_unop_encode(2#110, Opnds);
1071	'enter' -> enter_encode(Opnds);
1072	'idiv' -> arith_unop_encode(2#111, Opnds);
1073	'imul' -> imul_encode(Opnds);
1074	'inc' -> incdec_encode(2#000, Opnds);
1075	'into' -> case get(hipe_target_arch) of
1076                      x86   -> nullary_op_encode(16#CE, Opnds);
1077                      amd64 -> exit({invalid_amd64_opcode,
1078                                     hipe_amd64_encode__erl})
1079                  end;
1080	'jcc' -> jcc_encode(Opnds);
1081	'jecxz' -> jmp8_op_encode(16#E3, Opnds);
1082	'jmp' -> jmp_encode(Opnds);
1083	'lea' -> lea_encode(Opnds);
1084	'leave' -> nullary_op_encode(16#C9, Opnds);
1085	'loop' -> jmp8_op_encode(16#E2, Opnds);
1086	'loope' -> jmp8_op_encode(16#E1, Opnds);
1087	'loopne' -> jmp8_op_encode(16#E0, Opnds);
1088	'mov' -> mov_encode(Opnds);
1089	'movsx' -> movx_op_encode(16#BE, Opnds);
1090	'movzx' -> movx_op_encode(16#B6, Opnds);
1091	'mul' -> arith_unop_encode(2#100, Opnds);
1092	'neg' -> arith_unop_encode(2#011, Opnds);
1093	'nop' -> nullary_op_encode(16#90, Opnds);
1094	'not' -> arith_unop_encode(2#010, Opnds);
1095	'or' -> arith_binop_encode(2#001, Opnds);
1096	'pop' -> pop_encode(Opnds);
1097	'prefix_fs' -> nullary_op_encode(16#64, Opnds);
1098	'push' -> push_encode(Opnds);
1099	'rcl' -> shift_op_encode(2#010, Opnds);
1100	'rcr' -> shift_op_encode(2#011, Opnds);
1101	'ret' -> ret_encode(Opnds);
1102	'rol' -> shift_op_encode(2#000, Opnds);
1103	'ror' -> shift_op_encode(2#001, Opnds);
1104	'sar' -> shift_op_encode(2#111, Opnds);
1105	'sbb' -> arith_binop_encode(2#011, Opnds);
1106	'setcc' -> setcc_encode(Opnds);
1107	'shl' -> shift_op_encode(2#100, Opnds);
1108	'shld' -> shd_op_encode(16#A4, Opnds);
1109	'shr' -> shift_op_encode(2#101, Opnds);
1110	'shrd' -> shd_op_encode(16#AC, Opnds);
1111	'stc' -> nullary_op_encode(16#F9, Opnds);
1112	'std' -> nullary_op_encode(16#FD, Opnds);
1113	'sub' -> arith_binop_encode(2#101, Opnds);
1114	'test' -> test_encode(Opnds);
1115	'xor' -> arith_binop_encode(2#110, Opnds);
1116
1117        %% sse2
1118        'addsd'   -> sse2_arith_binop_encode(16#F2, 16#58, Opnds);
1119        'cmpsd'   -> sse2_arith_binop_encode(16#F2, 16#C2, Opnds);
1120        'comisd'  -> sse2_arith_binop_encode(16#66, 16#2F, Opnds);
1121	'cvtsi2sd' -> sse2_cvtsi2sd_encode(Opnds);
1122        'divsd'   -> sse2_arith_binop_encode(16#F2, 16#5E, Opnds);
1123        'maxsd'   -> sse2_arith_binop_encode(16#F2, 16#5F, Opnds);
1124        'minsd'   -> sse2_arith_binop_encode(16#F2, 16#5D, Opnds);
1125        'movsd'   -> sse2_mov_encode(Opnds);
1126        'mulsd'   -> sse2_arith_binop_encode(16#F2, 16#59, Opnds);
1127        'sqrtsd'  -> sse2_arith_binop_encode(16#F2, 16#51, Opnds);
1128        'subsd'   -> sse2_arith_binop_encode(16#F2, 16#5C, Opnds);
1129        'ucomisd' -> sse2_arith_binop_encode(16#66, 16#2E, Opnds);
1130	'xorpd'   -> sse2_arith_binop_encode(16#66, 16#57, Opnds);
1131        %% End of sse2
1132
1133	%% x87
1134	'fadd'   -> x87_comm_arith_encode(2#000, Opnds);
1135	'faddp'  -> x87_comm_arith_pop_encode(2#000, Opnds);
1136	'fchs'   -> fchs_encode();
1137	'fdiv'   -> x87_arith_encode(2#110, Opnds);
1138	'fdivp'  -> x87_arith_pop_encode(2#110, Opnds);
1139	'fdivr'  -> x87_arith_rev_encode(2#111, Opnds);
1140	'fdivrp' -> x87_arith_rev_pop_encode(2#111, Opnds);
1141	'ffree'  -> ffree_encode(Opnds);
1142	'fild'   -> fild_encode(Opnds);
1143	'fld'    -> fld_encode(Opnds);
1144	'fmul'   -> x87_comm_arith_encode(2#001, Opnds);
1145	'fmulp'  -> x87_comm_arith_pop_encode(2#001, Opnds);
1146	'fst'    -> fst_encode(2#010, Opnds);
1147	'fstp'   -> fst_encode(2#011, Opnds);
1148	'fsub'   -> x87_arith_encode(2#100, Opnds);
1149	'fsubp'  -> x87_arith_pop_encode(2#100, Opnds);
1150	'fsubr'  -> x87_arith_rev_encode(2#101, Opnds);
1151	'fsubrp' -> x87_arith_rev_pop_encode(2#101, Opnds);
1152	'fwait'  -> fwait_encode();
1153	'fxch'   -> fxch_encode(Opnds);
1154	%% End of x87
1155
1156	_ -> exit({?MODULE,insn_encode,Op})
1157    end.
1158
1159insn_sizeof(Op, Opnds) ->
1160    case Op of
1161	'cbw' -> cbw_sizeof(Opnds);
1162  	'cdq' -> nullary_op_sizeof(Opnds);
1163  	'clc' -> nullary_op_sizeof(Opnds);
1164  	'cld' -> nullary_op_sizeof(Opnds);
1165  	'cmc' -> nullary_op_sizeof(Opnds);
1166  	'cwde' -> nullary_op_sizeof(Opnds);
1167  	'enter' -> enter_sizeof(Opnds);
1168  	'into' -> nullary_op_sizeof(Opnds);
1169  	'jcc' -> jcc_sizeof(Opnds);
1170  	'jecxz' -> jmp8_op_sizeof(Opnds);
1171  	'leave' -> nullary_op_sizeof(Opnds);
1172  	'loop' -> jmp8_op_sizeof(Opnds);
1173  	'loope' -> jmp8_op_sizeof(Opnds);
1174  	'loopne' -> jmp8_op_sizeof(Opnds);
1175  	'nop' -> nullary_op_sizeof(Opnds);
1176  	'prefix_fs' -> nullary_op_sizeof(Opnds);
1177  	'ret' -> ret_sizeof(Opnds);
1178  	'stc' -> nullary_op_sizeof(Opnds);
1179  	'std' -> nullary_op_sizeof(Opnds);
1180
1181%% 	%% x87
1182%% 	'fadd'   -> x87_arith_sizeof(Opnds);
1183%% 	'faddp'  -> x87_arith_sizeof(Opnds);
1184 	'fchs'   -> fchs_sizeof();
1185%% 	'fdiv'   -> x87_arith_sizeof(Opnds);
1186%% 	'fdivp'  -> x87_arith_sizeof(Opnds);
1187%% 	'fdivr'  -> x87_arith_sizeof(Opnds);
1188%% 	'fdivrp' -> x87_arith_sizeof(Opnds);
1189 	'ffree'  -> ffree_sizeof();
1190%% 	'fild'   -> fild_sizeof(Opnds);
1191%% 	'fld'    -> fld_sizeof(Opnds);
1192%% 	'fmul'   -> x87_arith_sizeof(Opnds);
1193%% 	'fmulp'  -> x87_arith_sizeof(Opnds);
1194%% 	'fst'    -> fst_sizeof(Opnds);
1195%% 	'fstp'   -> fst_sizeof(Opnds);
1196%% 	'fsub'   -> x87_arith_sizeof(Opnds);
1197%% 	'fsubp'  -> x87_arith_sizeof(Opnds);
1198%% 	'fsubr'  -> x87_arith_sizeof(Opnds);
1199%% 	'fsubrp' -> x87_arith_sizeof(Opnds);
1200 	'fwait'  -> fwait_sizeof();
1201 	'fxch'   -> fxch_sizeof();
1202%% 	%% End of x87
1203	_ -> %% Hack that is to be removed some day... Maybe...
1204            {Bytes, _} = insn_encode(Op, Opnds, 0),
1205            length(Bytes)
1206%%	'adc' -> arith_binop_sizeof(Opnds);
1207%%  	'add' -> arith_binop_sizeof(Opnds);
1208%%  	'and' -> arith_binop_sizeof(Opnds);
1209%%  	'bsf' -> bs_op_sizeof(Opnds);
1210%%  	'bsr' -> bs_op_sizeof(Opnds);
1211%%  	'bswap' -> bswap_sizeof(Opnds);
1212%%  	'bt' -> bt_op_sizeof(Opnds);
1213%%  	'btc' -> bt_op_sizeof(Opnds);
1214%%  	'btr' -> bt_op_sizeof(Opnds);
1215%%  	'bts' -> bt_op_sizeof(Opnds);
1216%%  	'call' -> call_sizeof(Opnds);
1217%%  	'cmovcc' -> cmovcc_sizeof(Opnds);
1218%%  	'cmp' -> arith_binop_sizeof(Opnds);
1219%%  	'dec' -> incdec_sizeof(Opnds);
1220%%  	'div' -> arith_unop_sizeof(Opnds);
1221%%  	'idiv' -> arith_unop_sizeof(Opnds);
1222%%  	'imul' -> imul_sizeof(Opnds);
1223%%  	'inc' -> incdec_sizeof(Opnds);
1224%%  	'jmp' -> jmp_sizeof(Opnds);
1225%%  	'lea' -> lea_sizeof(Opnds);
1226%%  	'mov' -> mov_sizeof(Opnds);
1227%%  	'movsx' -> movx_op_sizeof(Opnds);
1228%%  	'movzx' -> movx_op_sizeof(Opnds);
1229%%  	'mul' -> arith_unop_sizeof(Opnds);
1230%%  	'neg' -> arith_unop_sizeof(Opnds);
1231%%  	'not' -> arith_unop_sizeof(Opnds);
1232%%  	'or' -> arith_binop_sizeof(Opnds);
1233%%  	'pop' -> pop_sizeof(Opnds);
1234%%  	'push' -> push_sizeof(Opnds);
1235%%  	'rcl' -> shift_op_sizeof(Opnds);
1236%%  	'rcr' -> shift_op_sizeof(Opnds);
1237%%  	'rol' -> shift_op_sizeof(Opnds);
1238%%  	'ror' -> shift_op_sizeof(Opnds);
1239%%  	'sar' -> shift_op_sizeof(Opnds);
1240%%  	'sbb' -> arith_binop_sizeof(Opnds);
1241%%  	'setcc' -> setcc_sizeof(Opnds);
1242%%  	'shl' -> shift_op_sizeof(Opnds);
1243%%  	'shld' -> shd_op_sizeof(Opnds);
1244%%  	'shr' -> shift_op_sizeof(Opnds);
1245%%  	'shrd' -> shd_op_sizeof(Opnds);
1246%% 	'sub' -> arith_binop_sizeof(Opnds);
1247%%  	'test' -> test_sizeof(Opnds);
1248%%  	'xor' -> arith_binop_sizeof(Opnds);
1249%%	_ -> exit({?MODULE,insn_sizeof,Op})
1250    end.
1251
1252%%=====================================================================
1253%% testing interface
1254%%=====================================================================
1255
1256-ifdef(DO_HIPE_AMD64_ENCODE_TEST).
1257
1258say(OS, Str) ->
1259    file:write(OS, Str).
1260
1261digit16(Dig0) ->
1262    Dig = Dig0 band 16#F,
1263    if Dig >= 16#A -> $A + (Dig - 16#A);
1264       true -> $0 + Dig
1265    end.
1266
1267say_byte(OS, Byte) ->
1268    say(OS, "0x"),
1269    say(OS, [digit16(Byte bsr 4)]),
1270    say(OS, [digit16(Byte)]).
1271
1272init(OS) ->
1273    say(OS, "\t.text\n").
1274
1275say_bytes(OS, Byte0, Bytes0) ->
1276    say_byte(OS, Byte0),
1277    case Bytes0 of
1278	[] ->
1279	    say(OS, "\n");
1280	[Byte1|Bytes1] ->
1281	    say(OS, ","),
1282	    say_bytes(OS, Byte1, Bytes1)
1283    end.
1284
1285t(OS, Op, Opnds) ->
1286    insn_sizeof(Op, Opnds),
1287    {[Byte|Bytes],[]} = insn_encode(Op, Opnds, 0),
1288    say(OS, "\t.byte "),
1289    say_bytes(OS, Byte, Bytes).
1290
1291dotest1(OS) ->
1292    init(OS),
1293    % exercise all rm32 types
1294    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_rip(16#87654321)}}),
1295    t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX))}}),
1296    t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX,sindex(2#10,?EDI)))}}),
1297    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321)}}),
1298    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321,sindex(2#10,?EDI))}}),
1299    t(OS,lea,{{reg32,?EAX},{ea,ea_base(?ECX)}}),
1300    t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX))}}),
1301    t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX,sindex(2#10,?EDI)))}}),
1302    t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_base(16#3,?ECX)}}),
1303    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX))}}),
1304    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX,sindex(2#10,?EDI)))}}),
1305    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_base(16#87654321,?EBP)}}),
1306    t(OS,call,{{rm32,rm_reg(?EAX)}}),
1307    t(OS,call,{{rm32,rm_mem(ea_disp32_sindex(16#87654321,sindex(2#10,?EDI)))}}),
1308    t(OS,call,{{rel32,-5}}),
1309    % default parameters for the tests below
1310    Word32 = 16#87654321,
1311    Word16 = 16#F00F,
1312    Word8 = 16#80,
1313    Imm32 = {imm32,Word32},
1314    Imm16 = {imm16,Word16},
1315    Imm8 = {imm8,Word8},
1316    RM64 = {rm64,rm_reg(?EDX)},
1317    RM32 = {rm32,rm_reg(?EDX)},
1318    RM16 = {rm16,rm_reg(?EDX)},
1319    RM16REX = {rm16,rm_reg(?R13)},
1320    RM8 = {rm8,rm_reg(?EDX)},
1321    RM8REX = {rm8,rm_reg(?SIL)},
1322    Rel32 = {rel32,Word32},
1323    Rel8 = {rel8,Word8},
1324    Moffs32 = {moffs32,Word32},
1325    Moffs16 = {moffs16,Word32},
1326    Moffs8 = {moffs8,Word32},
1327    CC = {cc,?CC_G},
1328    Reg64 = {reg64,?EAX},
1329    Reg32 = {reg32,?EAX},
1330    Reg16 = {reg16,?EAX},
1331    Reg8 = {reg8,?SPL},
1332    EA = {ea,ea_base(?ECX)},
1333    % exercise each instruction definition
1334    t(OS,'adc',{eax,Imm32}),
1335    t(OS,'adc',{RM32,Imm32}),
1336    t(OS,'adc',{RM32,Imm8}),
1337    t(OS,'adc',{RM32,Reg32}),
1338    t(OS,'adc',{Reg32,RM32}),
1339    t(OS,'add',{eax,Imm32}),
1340    t(OS,'add',{RM32,Imm32}),
1341    t(OS,'add',{RM32,Imm8}),
1342    t(OS,'add',{RM32,Reg32}),
1343    t(OS,'add',{Reg32,RM32}),
1344    t(OS,'and',{eax,Imm32}),
1345    t(OS,'and',{RM32,Imm32}),
1346    t(OS,'and',{RM32,Imm8}),
1347    t(OS,'and',{RM32,Reg32}),
1348    t(OS,'and',{Reg32,RM32}),
1349    t(OS,'bsf',{Reg32,RM32}),
1350    t(OS,'bsr',{Reg32,RM32}),
1351    t(OS,'bswap',{Reg32}),
1352    t(OS,'bt',{RM32,Reg32}),
1353    t(OS,'bt',{RM32,Imm8}),
1354    t(OS,'btc',{RM32,Reg32}),
1355    t(OS,'btc',{RM32,Imm8}),
1356    t(OS,'btr',{RM32,Reg32}),
1357    t(OS,'btr',{RM32,Imm8}),
1358    t(OS,'bts',{RM32,Reg32}),
1359    t(OS,'bts',{RM32,Imm8}),
1360    t(OS,'call',{Rel32}),
1361    t(OS,'call',{RM32}),
1362    t(OS,'cbw',{}),
1363    t(OS,'cdq',{}),
1364    t(OS,'clc',{}),
1365    t(OS,'cld',{}),
1366    t(OS,'cmc',{}),
1367    t(OS,'cmovcc',{CC,Reg32,RM32}),
1368    t(OS,'cmp',{eax,Imm32}),
1369    t(OS,'cmp',{RM32,Imm32}),
1370    t(OS,'cmp',{RM32,Imm8}),
1371    t(OS,'cmp',{RM32,Reg32}),
1372    t(OS,'cmp',{Reg32,RM32}),
1373    t(OS,'cwde',{}),
1374    t(OS,'dec',{RM32}),
1375    t(OS,'dec',{Reg32}),
1376    t(OS,'div',{RM32}),
1377    t(OS,'enter',{Imm16,{imm8,3}}),
1378    t(OS,'idiv',{RM32}),
1379    t(OS,'imul',{RM32}),
1380    t(OS,'imul',{Reg32,RM32}),
1381    t(OS,'imul',{Reg32,RM32,Imm8}),
1382    t(OS,'imul',{Reg32,RM32,Imm32}),
1383    t(OS,'inc',{RM32}),
1384    t(OS,'inc',{Reg32}),
1385    t(OS,'into',{}),
1386    t(OS,'jcc',{CC,Rel8}),
1387    t(OS,'jcc',{CC,Rel32}),
1388    t(OS,'jecxz',{Rel8}),
1389    t(OS,'jmp',{Rel8}),
1390    t(OS,'jmp',{Rel32}),
1391    t(OS,'jmp',{RM32}),
1392    t(OS,'lea',{Reg32,EA}),
1393    t(OS,'leave',{}),
1394    t(OS,'loop',{Rel8}),
1395    t(OS,'loope',{Rel8}),
1396    t(OS,'loopne',{Rel8}),
1397    t(OS,'mov',{RM8,Reg8}),
1398    t(OS,'mov',{RM16,Reg16}),
1399    t(OS,'mov',{RM32,Reg32}),
1400    t(OS,'mov',{Reg8,RM8}),
1401    t(OS,'mov',{Reg16,RM16}),
1402    t(OS,'mov',{Reg32,RM32}),
1403    t(OS,'mov',{al,Moffs8}),
1404    t(OS,'mov',{ax,Moffs16}),
1405    t(OS,'mov',{eax,Moffs32}),
1406    t(OS,'mov',{Moffs8,al}),
1407    t(OS,'mov',{Moffs16,ax}),
1408    t(OS,'mov',{Moffs32,eax}),
1409    t(OS,'mov',{Reg8,Imm8}),
1410    t(OS,'mov',{Reg16,Imm16}),
1411    t(OS,'mov',{Reg32,Imm32}),
1412    t(OS,'mov',{RM8,Imm8}),
1413    t(OS,'mov',{RM16,Imm16}),
1414    t(OS,'mov',{RM32,Imm32}),
1415    t(OS,'movsx',{Reg16,RM8}),
1416    t(OS,'movsx',{Reg32,RM8}),
1417    t(OS,'movsx',{Reg32,RM16}),
1418    t(OS,'movzx',{Reg16,RM8}),
1419    t(OS,'movzx',{Reg32,RM8}),
1420    t(OS,'movzx',{Reg32,RM16}),
1421    t(OS,'mul',{RM32}),
1422    t(OS,'neg',{RM32}),
1423    t(OS,'nop',{}),
1424    t(OS,'not',{RM32}),
1425    t(OS,'or',{eax,Imm32}),
1426    t(OS,'or',{RM32,Imm32}),
1427    t(OS,'or',{RM32,Imm8}),
1428    t(OS,'or',{RM32,Reg32}),
1429    t(OS,'or',{Reg32,RM32}),
1430    t(OS,'pop',{RM32}),
1431    t(OS,'pop',{Reg32}),
1432    t(OS,'push',{RM32}),
1433    t(OS,'push',{Reg32}),
1434    t(OS,'push',{Imm8}),
1435    t(OS,'push',{Imm32}),
1436    t(OS,'rcl',{RM32,1}),
1437    t(OS,'rcl',{RM32,cl}),
1438    t(OS,'rcl',{RM32,Imm8}),
1439    t(OS,'rcr',{RM32,1}),
1440    t(OS,'rcr',{RM32,cl}),
1441    t(OS,'rcr',{RM32,Imm8}),
1442    t(OS,'ret',{}),
1443    t(OS,'ret',{Imm16}),
1444    t(OS,'rol',{RM32,1}),
1445    t(OS,'rol',{RM32,cl}),
1446    t(OS,'rol',{RM32,Imm8}),
1447    t(OS,'ror',{RM32,1}),
1448    t(OS,'ror',{RM32,cl}),
1449    t(OS,'ror',{RM32,Imm8}),
1450    t(OS,'sar',{RM32,1}),
1451    t(OS,'sar',{RM32,cl}),
1452    t(OS,'sar',{RM32,Imm8}),
1453    t(OS,'sbb',{eax,Imm32}),
1454    t(OS,'sbb',{RM32,Imm32}),
1455    t(OS,'sbb',{RM32,Imm8}),
1456    t(OS,'sbb',{RM32,Reg32}),
1457    t(OS,'sbb',{Reg32,RM32}),
1458    t(OS,'setcc',{CC,RM8}),
1459    t(OS,'shl',{RM32,1}),
1460    t(OS,'shl',{RM32,cl}),
1461    t(OS,'shl',{RM32,Imm8}),
1462    t(OS,'shld',{RM32,Reg32,Imm8}),
1463    t(OS,'shld',{RM32,Reg32,cl}),
1464    t(OS,'shr',{RM32,1}),
1465    t(OS,'shr',{RM32,cl}),
1466    t(OS,'shr',{RM32,Imm8}),
1467    t(OS,'shrd',{RM32,Reg32,Imm8}),
1468    t(OS,'shrd',{RM32,Reg32,cl}),
1469    t(OS,'stc',{}),
1470    t(OS,'std',{}),
1471    t(OS,'sub',{eax,Imm32}),
1472    t(OS,'sub',{RM32,Imm32}),
1473    t(OS,'sub',{RM32,Imm8}),
1474    t(OS,'sub',{RM32,Reg32}),
1475    t(OS,'sub',{Reg32,RM32}),
1476    t(OS,'test',{al,Imm8}),
1477    t(OS,'test',{ax,Imm16}),
1478    t(OS,'test',{eax,Imm32}),
1479    t(OS,'test',{rax,Imm32}),
1480    t(OS,'test',{RM8,Imm8}),
1481    t(OS,'test',{RM8REX,Imm8}),
1482    t(OS,'test',{RM16,Imm16}),
1483    t(OS,'test',{RM16REX,Imm16}),
1484    t(OS,'test',{RM32,Imm32}),
1485    t(OS,'test',{RM64,Imm32}),
1486    t(OS,'test',{RM32,Reg32}),
1487    t(OS,'test',{RM64,Reg64}),
1488    t(OS,'xor',{eax,Imm32}),
1489    t(OS,'xor',{RM32,Imm32}),
1490    t(OS,'xor',{RM32,Imm8}),
1491    t(OS,'xor',{RM32,Reg32}),
1492    t(OS,'xor',{Reg32,RM32}),
1493    t(OS,'prefix_fs',{}), t(OS,'add',{{reg32,?EAX},{rm32,rm_mem(ea_disp32_rip(16#20))}}),
1494    [].
1495
1496dotest() -> dotest1(group_leader()).	% stdout == group_leader
1497
1498dotest(File) ->
1499    {ok,OS} = file:open(File, [write]),
1500    dotest1(OS),
1501    file:close(OS).
1502-endif.
1503