1 {
2     Copyright (c) 1998-2002 by Florian Klaempfl
3 
4     This unit implements the code generator for the i8086
5 
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 
20  ****************************************************************************
21 }
22 unit cgcpu;
23 
24 {$i fpcdefs.inc}
25 
26   interface
27 
28     uses
29        globtype,
30        cgbase,cgobj,cg64f32,cgx86,
31        aasmbase,aasmtai,aasmdata,aasmcpu,
32        cpubase,parabase,cgutils,
33        symconst,symdef
34        ;
35 
36     type
37 
38       { tcg8086 }
39 
40       tcg8086 = class(tcgx86)
41         procedure init_register_allocators;override;
42         procedure do_register_allocation(list:TAsmList;headertai:tai);override;
43 
44         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
45         procedure a_call_name_far(list : TAsmList;const s : string; weak: boolean);
46         procedure a_call_name_static(list : TAsmList;const s : string);override;
47         procedure a_call_name_static_far(list : TAsmList;const s : string);
48         procedure a_call_reg(list : TAsmList;reg : tregister);override;
49         procedure a_call_reg_far(list : TAsmList;reg : tregister);
50 
51         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
52         procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
53         procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
54         procedure a_op_ref_reg(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
55         procedure a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;
56 
57         procedure push_const(list:TAsmList;size:tcgsize;a:tcgint);
58 
59         { passing parameter using push instead of mov }
60         procedure a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara);override;
61         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const cgpara : tcgpara);override;
62         procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : tcgpara);override;
63         procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);override;
64 
65         { move instructions }
66         procedure a_load_const_reg(list : TAsmList; tosize: tcgsize; a : tcgint;reg : tregister);override;
67         procedure a_load_const_ref(list : TAsmList; tosize: tcgsize; a : tcgint;const ref : treference);override;
68         procedure a_load_reg_ref(list : TAsmList;fromsize,tosize: tcgsize; reg : tregister;const ref : treference);override;
69         { use a_load_ref_reg_internal() instead }
70         //procedure a_load_ref_reg(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister);override;
71         procedure a_load_reg_reg(list : TAsmList;fromsize,tosize: tcgsize;reg1,reg2 : tregister);override;
72 
73         {  comparison operations }
74         procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
75           l : tasmlabel);override;
76         procedure a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;const ref : treference;
77           l : tasmlabel);override;
78         procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
79         procedure a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister; l : tasmlabel); override;
80         procedure a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister; const ref: treference; l : tasmlabel); override;
81 
82         procedure gen_cmp32_jmp1(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
83         procedure gen_cmp32_jmp2(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
84 
85         procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);override;
86         procedure g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);override;
87 
88         procedure g_stackpointer_alloc(list : TAsmList;localsize: longint);override;
89         procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
90         procedure g_copyvaluepara_openarray(list : TAsmList;const ref:treference;const lenloc:tlocation;elesize:tcgint;destreg:tregister);
91         procedure g_releasevaluepara_openarray(list : TAsmList;const l:tlocation);
92 
93         procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);override;
94 
95         procedure get_32bit_ops(op: TOpCG; out op1,op2: TAsmOp);
96 
97         procedure add_move_instruction(instr:Taicpu);override;
98      protected
99         procedure a_load_ref_reg_internal(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister;isdirect:boolean);override;
100      end;
101 
102       tcg64f8086 = class(tcg64f32)
103         procedure a_op64_ref_reg(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference;reg : tregister64);override;
104         procedure a_op64_reg_ref(list : TAsmList;op:TOpCG;size : tcgsize;reg : tregister64; const ref: treference);override;
105         procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
106         procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
107         procedure a_op64_const_ref(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const ref : treference);override;
108       private
109         procedure get_64bit_ops(op:TOpCG;var op1,op2:TAsmOp);
110       end;
111 
112     procedure create_codegen;
113 
114   implementation
115 
116     uses
117        globals,verbose,systems,cutils,
118        paramgr,procinfo,fmodule,
119        rgcpu,rgx86,cpuinfo,
120        symtype,symsym,symcpu,
121        tgobj,
122        hlcgobj;
123 
124 { Range check must be disabled explicitly as the code uses
125   implicit typecast to aint troughout }
126 {$R-}
127 
use_pushnull128     function use_push(const cgpara:tcgpara):boolean;
129       begin
130         result:=(not paramanager.use_fixed_stack) and
131                 assigned(cgpara.location) and
132                 (cgpara.location^.loc=LOC_REFERENCE) and
133                 (cgpara.location^.reference.index=NR_STACK_POINTER_REG);
134       end;
135 
136 
137     procedure tcg8086.init_register_allocators;
138       begin
139         inherited init_register_allocators;
140         if cs_create_pic in current_settings.moduleswitches then
141           rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_SI,RS_DI],first_int_imreg,[RS_BP])
142         else
143           if (cs_useebp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer<>NR_BP) then
144             rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_BX,RS_SI,RS_DI,RS_BP],first_int_imreg,[])
145           else
146             rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_BX,RS_SI,RS_DI],first_int_imreg,[RS_BP]);
147         rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
148         rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
149         rgfpu:=Trgx86fpu.create;
150       end;
151 
152     procedure tcg8086.do_register_allocation(list:TAsmList;headertai:tai);
153       begin
154         if (pi_needs_got in current_procinfo.flags) then
155           begin
156             if getsupreg(current_procinfo.got) < first_int_imreg then
157               include(rg[R_INTREGISTER].used_in_proc,getsupreg(current_procinfo.got));
158           end;
159         inherited do_register_allocation(list,headertai);
160       end;
161 
162 
163     procedure tcg8086.a_call_name(list: TAsmList; const s: string; weak: boolean);
164       begin
165         if current_settings.x86memorymodel in x86_far_code_models then
166           a_call_name_far(list,s,weak)
167         else
168           a_call_name_near(list,s,weak);
169       end;
170 
171 
172     procedure tcg8086.a_call_name_far(list: TAsmList; const s: string;
173       weak: boolean);
174       var
175         sym : tasmsymbol;
176       begin
177         if not(weak) then
178           sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
elsenull179         else
180           sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION);
181         list.concat(taicpu.op_sym(A_CALL,S_FAR,sym));
182       end;
183 
184 
185     procedure tcg8086.a_call_name_static(list: TAsmList; const s: string);
186       begin
187         if current_settings.x86memorymodel in x86_far_code_models then
188           a_call_name_static_far(list,s)
189         else
190           a_call_name_static_near(list,s);
191       end;
192 
193 
194     procedure tcg8086.a_call_name_static_far(list: TAsmList; const s: string);
195       var
196         sym : tasmsymbol;
197       begin
198         sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION);
list.concatnull199         list.concat(taicpu.op_sym(A_CALL,S_FAR,sym));
200       end;
201 
202 
203     procedure tcg8086.a_call_reg(list: TAsmList; reg: tregister);
204       begin
205         if current_settings.x86memorymodel in x86_far_code_models then
206           a_call_reg_far(list,reg)
207         else
208           a_call_reg_near(list,reg);
209       end;
210 
211 
212     procedure tcg8086.a_call_reg_far(list: TAsmList; reg: tregister);
213       var
214         href: treference;
215       begin
216         { unfortunately, x86 doesn't have a 'call far reg:reg' instruction, so }
217         { we have to use a temp }
218         tg.gettemp(list,4,2,tt_normal,href);
219         { HACK!!! at this point all registers are allocated, due to the fact that
220           in the pascal calling convention, all registers are caller saved. This
221           causes the register allocator to fail on the next move instruction, so we
222           temporarily deallocate 2 registers.
223           TODO: figure out a better way to do this. }
224         cg.ungetcpuregister(list,NR_BX);
225         cg.ungetcpuregister(list,NR_SI);
226         a_load_reg_ref(list,OS_32,OS_32,reg,href);
227         cg.getcpuregister(list,NR_BX);
228         cg.getcpuregister(list,NR_SI);
229         href.segment:=NR_NO;
230         list.concat(taicpu.op_ref(A_CALL,S_FAR,href));
231         tg.ungettemp(list,href);
232       end;
233 
234 
235     procedure tcg8086.a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize;
236       a: tcgint; reg: TRegister);
237       type
238         trox32method=(rm_unspecified,rm_unrolledleftloop,rm_unrolledrightloop,
239                       rm_loopleft,rm_loopright,rm_fast_386);
240       var
241         tmpreg: tregister;
242         op1, op2: TAsmOp;
243         ax_subreg: tregister;
244         hl_loop_start: tasmlabel;
245         ai: taicpu;
246         use_loop, use_186_fast_shift, use_8086_fast_shift,
247           use_386_fast_shift: Boolean;
248         rox32method: trox32method=rm_unspecified;
249         i: Integer;
250         rol_amount, ror_amount: TCGInt;
251       begin
252         optimize_op_const(size, op, a);
253         check_register_size(size,reg);
254 
255         if size in [OS_64, OS_S64] then
256           internalerror(2013030904);
257 
258         if size in [OS_32, OS_S32] then
259           begin
260             case op of
261               OP_NONE:
262                 begin
263                   { Opcode is optimized away }
264                 end;
265               OP_MOVE:
266                 begin
267                   { Optimized, replaced with a simple load }
268                   a_load_const_reg(list,size,a,reg);
269                 end;
270               OP_ADD, OP_SUB:
271                 begin
272                   get_32bit_ops(op, op1, op2);
273                   { Optimization when the low 16-bits of the constant are 0 }
274                   if aint(a and $FFFF) = 0 then
275                     begin
276                       { use a_op_const_reg to allow the use of inc/dec }
277                       a_op_const_reg(list,op,OS_16,aint(a shr 16),GetNextReg(reg));
278                     end
279                   else
280                     begin
281                       cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
282                       list.concat(taicpu.op_const_reg(op1,S_W,aint(a and $FFFF),reg));
283                       list.concat(taicpu.op_const_reg(op2,S_W,aint(a shr 16),GetNextReg(reg)));
284                       cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
285                     end;
286                 end;
287               OP_AND, OP_OR, OP_XOR:
288                 begin
289                   { low word operation }
290                   if aint(a and $FFFF) = aint(0) then
291                     begin
292                       case op of
293                         OP_AND:
294                           a_load_const_reg(list,OS_16,aint(0),reg);
295                         OP_OR,OP_XOR:
296                           {do nothing};
297                         else
298                           InternalError(2013100701);
299                       end;
300                     end
301                   else if aint(a and $FFFF) = aint($FFFF) then
302                     begin
303                       case op of
304                         OP_AND:
305                           {do nothing};
306                         OP_OR:
307                           a_load_const_reg(list,OS_16,aint($FFFF),reg);
308                         OP_XOR:
309                           list.concat(taicpu.op_reg(A_NOT,S_W,reg));
310                         else
311                           InternalError(2013100701);
312                       end;
313                     end
314                   else
315                     a_op_const_reg(list,op,OS_16,aint(a and $FFFF),reg);
316 
317                   { high word operation }
318                   if aint(a shr 16) = aint(0) then
319                     begin
320                       case op of
321                         OP_AND:
322                           a_load_const_reg(list,OS_16,aint(0),GetNextReg(reg));
323                         OP_OR,OP_XOR:
324                           {do nothing};
325                         else
326                           InternalError(2013100701);
327                       end;
328                     end
329                   else if aint(a shr 16) = aint($FFFF) then
330                     begin
331                       case op of
332                         OP_AND:
333                           {do nothing};
334                         OP_OR:
335                           a_load_const_reg(list,OS_16,aint($FFFF),GetNextReg(reg));
336                         OP_XOR:
337                           list.concat(taicpu.op_reg(A_NOT,S_W,GetNextReg(reg)));
338                         else
339                           InternalError(2013100701);
340                       end;
341                     end
342                   else
343                     a_op_const_reg(list,op,OS_16,aint(a shr 16),GetNextReg(reg));
344                 end;
345               OP_SHR,OP_SHL,OP_SAR:
346                 begin
347                   a:=a and 31;
348                   { for shl with const >= 16, we can just move the low register
349                     to the high reg, then zero the low register, then do the
350                     remaining part of the shift (by const-16) in 16 bit on the
351                     high register. the same thing applies to shr with low and high
352                     reversed. sar is exactly like shr, except that instead of
353                     zeroing the high register, we sar it by 15. }
354                   if a>=16 then
355                     case op of
356                       OP_SHR:
357                         begin
358                           a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),reg);
359                           a_load_const_reg(list,OS_16,0,GetNextReg(reg));
360                           a_op_const_reg(list,OP_SHR,OS_16,a-16,reg);
361                         end;
362                       OP_SHL:
363                         begin
364                           a_load_reg_reg(list,OS_16,OS_16,reg,GetNextReg(reg));
365                           a_load_const_reg(list,OS_16,0,reg);
366                           a_op_const_reg(list,OP_SHL,OS_16,a-16,GetNextReg(reg));
367                         end;
368                       OP_SAR:
369                         begin
370                           if a=31 then
371                             begin
372                               a_op_const_reg(list,OP_SAR,OS_16,15,GetNextReg(reg));
373                               a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),reg);
374                             end
375                           else
376                             begin
377                               a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),reg);
378                               a_op_const_reg(list,OP_SAR,OS_16,15,GetNextReg(reg));
379                               a_op_const_reg(list,OP_SAR,OS_16,a-16,reg);
380                             end;
381                         end;
382                       else
383                         internalerror(2013060201);
384                     end
385                   else if a<>0 then
386                     begin
387                       use_loop:=a>2;
388                       use_386_fast_shift:=(current_settings.cputype>=cpu_386) and (a>1);
389                       use_186_fast_shift:=not use_386_fast_shift
390                         and (current_settings.cputype>=cpu_186) and (a>2)
391                         and not (cs_opt_size in current_settings.optimizerswitches);
392                       use_8086_fast_shift:=(current_settings.cputype<cpu_186) and (a>2)
393                         and not (cs_opt_size in current_settings.optimizerswitches);
394 
395                       if use_386_fast_shift then
396                         begin
397                           case op of
398                             OP_SHR:
399                               begin
400                                 list.concat(taicpu.op_const_reg_reg(A_SHRD,S_W,a,GetNextReg(reg),reg));
401                                 list.concat(taicpu.op_const_reg(A_SHR,S_W,a,GetNextReg(reg)));
402                               end;
403                             OP_SAR:
404                               begin
405                                 list.concat(taicpu.op_const_reg_reg(A_SHRD,S_W,a,GetNextReg(reg),reg));
406                                 list.concat(taicpu.op_const_reg(A_SAR,S_W,a,GetNextReg(reg)));
407                               end;
408                             OP_SHL:
409                               begin
410                                 list.concat(taicpu.op_const_reg_reg(A_SHLD,S_W,a,reg,GetNextReg(reg)));
411                                 list.concat(taicpu.op_const_reg(A_SHL,S_W,a,reg));
412                               end;
413                             else
414                               internalerror(2017040401);
415                           end;
416                         end
417                       else if use_186_fast_shift then
418                         begin
419                           tmpreg:=getintregister(list,OS_16);
420                           case op of
421                             OP_SHR:
422                               begin
423                                 a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),tmpreg);
424                                 list.concat(taicpu.op_const_reg(A_SHR,S_W,a,GetNextReg(reg)));
425                                 list.concat(taicpu.op_const_reg(A_SHR,S_W,a,reg));
426                                 list.concat(taicpu.op_const_reg(A_SHL,S_W,16-a,tmpreg));
427                                 list.concat(taicpu.op_reg_reg(A_OR,S_W,tmpreg,reg));
428                               end;
429                             OP_SAR:
430                               begin
431                                 a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),tmpreg);
432                                 list.concat(taicpu.op_const_reg(A_SAR,S_W,a,GetNextReg(reg)));
433                                 list.concat(taicpu.op_const_reg(A_SHR,S_W,a,reg));
434                                 list.concat(taicpu.op_const_reg(A_SHL,S_W,16-a,tmpreg));
435                                 list.concat(taicpu.op_reg_reg(A_OR,S_W,tmpreg,reg));
436                               end;
437                             OP_SHL:
438                               begin
439                                 a_load_reg_reg(list,OS_16,OS_16,reg,tmpreg);
440                                 list.concat(taicpu.op_const_reg(A_SHL,S_W,a,reg));
441                                 list.concat(taicpu.op_const_reg(A_SHL,S_W,a,GetNextReg(reg)));
442                                 list.concat(taicpu.op_const_reg(A_SHR,S_W,16-a,tmpreg));
443                                 list.concat(taicpu.op_reg_reg(A_OR,S_W,tmpreg,GetNextReg(reg)));
444                               end;
445                             else
446                               internalerror(2017040301);
447                           end;
448                         end
449                       else if use_8086_fast_shift then
450                         begin
451                           getcpuregister(list,NR_CX);
452                           a_load_const_reg(list,OS_8,a,NR_CL);
453 
454                           tmpreg:=getintregister(list,OS_16);
455                           case op of
456                             OP_SHR:
457                               begin
458                                 a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),tmpreg);
459                                 list.concat(taicpu.op_reg_reg(A_SHR,S_W,NR_CL,GetNextReg(reg)));
460                                 list.concat(taicpu.op_reg_reg(A_SHR,S_W,NR_CL,reg));
461                                 if a<>8 then
462                                   a_load_const_reg(list,OS_8,16-a,NR_CL);
463                                 list.concat(taicpu.op_reg_reg(A_SHL,S_W,NR_CL,tmpreg));
464                                 list.concat(taicpu.op_reg_reg(A_OR,S_W,tmpreg,reg));
465                               end;
466                             OP_SAR:
467                               begin
468                                 a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),tmpreg);
469                                 list.concat(taicpu.op_reg_reg(A_SAR,S_W,NR_CL,GetNextReg(reg)));
470                                 list.concat(taicpu.op_reg_reg(A_SHR,S_W,NR_CL,reg));
471                                 if a<>8 then
472                                   a_load_const_reg(list,OS_8,16-a,NR_CL);
473                                 list.concat(taicpu.op_reg_reg(A_SHL,S_W,NR_CL,tmpreg));
474                                 list.concat(taicpu.op_reg_reg(A_OR,S_W,tmpreg,reg));
475                               end;
476                             OP_SHL:
477                               begin
478                                 a_load_reg_reg(list,OS_16,OS_16,reg,tmpreg);
479                                 list.concat(taicpu.op_reg_reg(A_SHL,S_W,NR_CL,reg));
480                                 list.concat(taicpu.op_reg_reg(A_SHL,S_W,NR_CL,GetNextReg(reg)));
481                                 if a<>8 then
482                                   a_load_const_reg(list,OS_8,16-a,NR_CL);
483                                 list.concat(taicpu.op_reg_reg(A_SHR,S_W,NR_CL,tmpreg));
484                                 list.concat(taicpu.op_reg_reg(A_OR,S_W,tmpreg,GetNextReg(reg)));
485                               end;
486                             else
487                               internalerror(2017040301);
488                           end;
489 
490                           ungetcpuregister(list,NR_CX);
491                         end
492                       else if use_loop then
493                         begin
494                           getcpuregister(list,NR_CX);
495                           a_load_const_reg(list,OS_16,a,NR_CX);
496 
497                           current_asmdata.getjumplabel(hl_loop_start);
498                           a_label(list,hl_loop_start);
499 
500                           case op of
501                             OP_SHR:
502                               begin
503                                 cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
504                                 list.concat(taicpu.op_const_reg(A_SHR,S_W,1,GetNextReg(reg)));
505                                 list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
506                                 cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
507                               end;
508                             OP_SAR:
509                               begin
510                                 cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
511                                 list.concat(taicpu.op_const_reg(A_SAR,S_W,1,GetNextReg(reg)));
512                                 list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
513                                 cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
514                               end;
515                             OP_SHL:
516                               begin
517                                 cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
518                                 list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg));
519                                 list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg)));
520                                 cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
521                               end;
522                             else
523                               internalerror(2013030903);
524                           end;
525 
526                           ai:=Taicpu.Op_Sym(A_LOOP,S_W,hl_loop_start);
527                           ai.is_jmp:=true;
528                           list.concat(ai);
529 
530                           ungetcpuregister(list,NR_CX);
531                         end
532                       else
533                         begin
534                           for i:=1 to a do
535                             begin
536                               case op of
537                                 OP_SHR:
538                                   begin
539                                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
540                                     list.concat(taicpu.op_const_reg(A_SHR,S_W,1,GetNextReg(reg)));
541                                     list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
542                                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
543                                   end;
544                                 OP_SAR:
545                                   begin
546                                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
547                                     list.concat(taicpu.op_const_reg(A_SAR,S_W,1,GetNextReg(reg)));
548                                     list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
549                                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
550                                   end;
551                                 OP_SHL:
552                                   begin
553                                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
554                                     list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg));
555                                     list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg)));
556                                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
557                                   end;
558                                 else
559                                   internalerror(2013030903);
560                               end;
561                             end;
562                         end;
563                     end;
564                 end;
565               OP_ROL,OP_ROR:
566                 begin
567                   a:=a and 31;
568                   if a=0 then
569                     exit;
570                   if op=OP_ROL then
571                     begin
572                       rol_amount:=a;
573                       ror_amount:=32-a;
574                     end
575                   else
576                     begin
577                       rol_amount:=32-a;
578                       ror_amount:=a;
579                     end;
580                   case rol_amount of
581                     1,17:
582                       rox32method:=rm_unrolledleftloop;
583                     2,18:
584                       if current_settings.cputype>=cpu_386 then
585                         rox32method:=rm_fast_386
586                       else if not (cs_opt_size in current_settings.optimizerswitches) then
587                         rox32method:=rm_unrolledleftloop
588                       else
589                         rox32method:=rm_loopleft;
590                     3..8,19..24:
591                       if current_settings.cputype>=cpu_386 then
592                         rox32method:=rm_fast_386
593                       else
594                         rox32method:=rm_loopleft;
595                     15,31:
596                       rox32method:=rm_unrolledrightloop;
597                     14,30:
598                       if current_settings.cputype>=cpu_386 then
599                         rox32method:=rm_fast_386
600                       else if not (cs_opt_size in current_settings.optimizerswitches) then
601                         rox32method:=rm_unrolledrightloop
602                       else
603                         { the left loop has a smaller size }
604                         rox32method:=rm_loopleft;
605                     9..13,25..29:
606                       if current_settings.cputype>=cpu_386 then
607                         rox32method:=rm_fast_386
608                       else if not (cs_opt_size in current_settings.optimizerswitches) then
609                         rox32method:=rm_loopright
610                       else
611                         { the left loop has a smaller size }
612                         rox32method:=rm_loopleft;
613                     16:
614                       rox32method:=rm_unrolledleftloop;
615                     else
616                       internalerror(2017040601);
617                   end;
618                   case rox32method of
619                     rm_unrolledleftloop:
620                       begin
621                         if rol_amount>=16 then
622                           begin
623                             list.Concat(taicpu.op_reg_reg(A_XCHG,S_W,reg,GetNextReg(reg)));
624                             dec(rol_amount,16);
625                           end;
626                         for i:=1 to rol_amount do
627                           begin
628                             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
629                             list.Concat(taicpu.op_const_reg(A_SHL,S_W,1,GetNextReg(reg)));
630                             list.Concat(taicpu.op_const_reg(A_RCL,S_W,1,reg));
631                             list.Concat(taicpu.op_const_reg(A_ADC,S_W,0,GetNextReg(reg)));
632                             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
633                           end;
634                       end;
635                     rm_unrolledrightloop:
636                       begin
637                         if ror_amount>=16 then
638                           begin
639                             list.Concat(taicpu.op_reg_reg(A_XCHG,S_W,reg,GetNextReg(reg)));
640                             dec(ror_amount,16);
641                           end;
642                         tmpreg:=getintregister(list,OS_16);
643                         for i:=1 to ror_amount do
644                           begin
645                             a_load_reg_reg(list,OS_16,OS_16,reg,tmpreg);
646                             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
647                             list.Concat(taicpu.op_const_reg(A_SHR,S_W,1,tmpreg));
648                             list.Concat(taicpu.op_const_reg(A_RCR,S_W,1,GetNextReg(reg)));
649                             list.Concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
650                             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
651                           end;
652                       end;
653                     rm_loopleft:
654                       begin
655                         if (rol_amount>=16) and not (cs_opt_size in current_settings.optimizerswitches) then
656                           begin
657                             list.Concat(taicpu.op_reg_reg(A_XCHG,S_W,reg,GetNextReg(reg)));
658                             dec(rol_amount,16);
659                             if rol_amount=0 then
660                               exit;
661                           end;
662 
663                         getcpuregister(list,NR_CX);
664                         a_load_const_reg(list,OS_16,rol_amount,NR_CX);
665 
666                         current_asmdata.getjumplabel(hl_loop_start);
667                         a_label(list,hl_loop_start);
668 
669                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
670                         list.Concat(taicpu.op_const_reg(A_SHL,S_W,1,GetNextReg(reg)));
671                         list.Concat(taicpu.op_const_reg(A_RCL,S_W,1,reg));
672                         list.Concat(taicpu.op_const_reg(A_ADC,S_W,0,GetNextReg(reg)));
673                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
674 
675                         ai:=Taicpu.Op_Sym(A_LOOP,S_W,hl_loop_start);
676                         ai.is_jmp:=true;
677                         list.concat(ai);
678 
679                         ungetcpuregister(list,NR_CX);
680                       end;
681                     rm_loopright:
682                       begin
683                         if (ror_amount>=16) and not (cs_opt_size in current_settings.optimizerswitches) then
684                           begin
685                             list.Concat(taicpu.op_reg_reg(A_XCHG,S_W,reg,GetNextReg(reg)));
686                             dec(ror_amount,16);
687                             if ror_amount=0 then
688                               exit;
689                           end;
690 
691                         getcpuregister(list,NR_CX);
692                         a_load_const_reg(list,OS_16,ror_amount,NR_CX);
693 
694                         current_asmdata.getjumplabel(hl_loop_start);
695                         a_label(list,hl_loop_start);
696 
697                         tmpreg:=getintregister(list,OS_16);
698                         a_load_reg_reg(list,OS_16,OS_16,reg,tmpreg);
699                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
700                         list.Concat(taicpu.op_const_reg(A_SHR,S_W,1,tmpreg));
701                         list.Concat(taicpu.op_const_reg(A_RCR,S_W,1,GetNextReg(reg)));
702                         list.Concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
703                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
704 
705                         ai:=Taicpu.Op_Sym(A_LOOP,S_W,hl_loop_start);
706                         ai.is_jmp:=true;
707                         list.concat(ai);
708 
709                         ungetcpuregister(list,NR_CX);
710                       end;
711                     rm_fast_386:
712                       begin
713                         tmpreg:=getintregister(list,OS_16);
714                         a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),tmpreg);
715                         if op=OP_ROL then
716                           begin
717                             list.Concat(taicpu.op_const_reg_reg(A_SHLD,S_W,rol_amount,reg,GetNextReg(reg)));
718                             list.Concat(taicpu.op_const_reg_reg(A_SHLD,S_W,rol_amount,tmpreg,reg));
719                           end
720                         else
721                           begin
722                             list.Concat(taicpu.op_const_reg_reg(A_SHRD,S_W,ror_amount,reg,GetNextReg(reg)));
723                             list.Concat(taicpu.op_const_reg_reg(A_SHRD,S_W,ror_amount,tmpreg,reg));
724                           end;
725                       end;
726                     else
727                       internalerror(2017040602);
728                   end;
729                 end;
730               else
731                 begin
732                   tmpreg:=getintregister(list,size);
733                   a_load_const_reg(list,size,a,tmpreg);
734                   a_op_reg_reg(list,op,size,tmpreg,reg);
735                 end;
736             end;
737           end
738         else
739           begin
740             { size <= 16-bit }
741 
742             { 8086 doesn't support 'imul reg,const', so we handle it here }
743             if (current_settings.cputype<cpu_186) and (op in [OP_MUL,OP_IMUL]) then
744               begin
745                 if op = OP_IMUL then
746                   begin
747                     if size in [OS_16,OS_S16] then
748                       ax_subreg := NR_AX
749                     else
750                       if size in [OS_8,OS_S8] then
751                         ax_subreg := NR_AL
752                       else
753                         internalerror(2013050102);
754 
755                     getcpuregister(list,NR_AX);
756 
757                     a_load_const_reg(list,size,a,ax_subreg);
758                     if size in [OS_16,OS_S16] then
759                       getcpuregister(list,NR_DX);
760                     { prefer MUL over IMUL when overflow checking is off, }
761                     { because it's faster on the 8086 & 8088              }
762                     if not (cs_check_overflow in current_settings.localswitches) then
763                       list.concat(taicpu.op_reg(A_MUL,TCgSize2OpSize[size],reg))
764                     else
765                       list.concat(taicpu.op_reg(A_IMUL,TCgSize2OpSize[size],reg));
766                     if size in [OS_16,OS_S16] then
767                       ungetcpuregister(list,NR_DX);
768                     a_load_reg_reg(list,size,size,ax_subreg,reg);
769 
770                     ungetcpuregister(list,NR_AX);
771                     exit;
772                   end
773                 else
774                   { OP_MUL should be handled specifically in the code        }
775                   { generator because of the silly register usage restraints }
776                   internalerror(200109225);
777               end
778             else
779               inherited a_op_const_reg(list, Op, size, a, reg);
780           end;
781       end;
782 
783 
784     procedure tcg8086.a_op_const_ref(list: TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
785       var
786         tmpref: treference;
787         op1,op2: TAsmOp;
788         tmpreg: TRegister;
789       begin
790         optimize_op_const(size, op, a);
791         tmpref:=ref;
792         make_simple_ref(list,tmpref);
793 
794         if size in [OS_64, OS_S64] then
795           internalerror(2013050801);
796         if size in [OS_32, OS_S32] then
797           begin
798             case Op of
799               OP_NONE :
800                 begin
801                   { Opcode is optimized away }
802                 end;
803               OP_MOVE :
804                 begin
805                   { Optimized, replaced with a simple load }
806                   a_load_const_ref(list,size,a,ref);
807                 end;
808               OP_ADD, OP_SUB:
809                 begin
810                   get_32bit_ops(op, op1, op2);
811                   { Optimization when the low 16-bits of the constant are 0 }
812                   if aint(a and $FFFF) = 0 then
813                     begin
814                       inc(tmpref.offset, 2);
815                       { use a_op_const_ref to allow the use of inc/dec }
816                       a_op_const_ref(list,op,OS_16,aint(a shr 16),tmpref);
817                     end
818                   else
819                     begin
820                       cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
821                       list.concat(taicpu.op_const_ref(op1,S_W,aint(a and $FFFF),tmpref));
822                       inc(tmpref.offset, 2);
823                       list.concat(taicpu.op_const_ref(op2,S_W,aint(a shr 16),tmpref));
824                       cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
825                     end;
826                 end;
827               OP_AND, OP_OR, OP_XOR:
828                 begin
829                   { low word operation }
830                   if aint(a and $FFFF) = aint(0) then
831                     begin
832                       case op of
833                         OP_AND:
834                           a_load_const_ref(list,OS_16,aint(0),ref);
835                         OP_OR,OP_XOR:
836                           {do nothing};
837                         else
838                           InternalError(2013100701);
839                       end;
840                     end
841                   else if aint(a and $FFFF) = aint($FFFF) then
842                     begin
843                       case op of
844                         OP_AND:
845                           {do nothing};
846                         OP_OR:
847                           a_load_const_ref(list,OS_16,aint($FFFF),tmpref);
848                         OP_XOR:
849                           list.concat(taicpu.op_ref(A_NOT,S_W,tmpref));
850                         else
851                           InternalError(2013100701);
852                       end;
853                     end
854                   else
855                     a_op_const_ref(list,op,OS_16,aint(a and $FFFF),tmpref);
856 
857                   { high word operation }
858                   inc(tmpref.offset, 2);
859                   if aint(a shr 16) = aint(0) then
860                     begin
861                       case op of
862                         OP_AND:
863                           a_load_const_ref(list,OS_16,aint(0),tmpref);
864                         OP_OR,OP_XOR:
865                           {do nothing};
866                         else
867                           InternalError(2013100701);
868                       end;
869                     end
870                   else if aint(a shr 16) = aint($FFFF) then
871                     begin
872                       case op of
873                         OP_AND:
874                           {do nothing};
875                         OP_OR:
876                           a_load_const_ref(list,OS_16,aint($FFFF),tmpref);
877                         OP_XOR:
878                           list.concat(taicpu.op_ref(A_NOT,S_W,tmpref));
879                         else
880                           InternalError(2013100701);
881                       end;
882                     end
883                   else
884                     a_op_const_ref(list,op,OS_16,aint(a shr 16),tmpref);
885                 end;
886               OP_SHR,OP_SHL,OP_SAR:
887                 begin
888                   a:=a and 31;
889                   if a=1 then
890                     begin
891                       case op of
892                         OP_SHR:
893                           begin
894                             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
895                             inc(tmpref.offset, 2);
896                             list.concat(taicpu.op_const_ref(A_SHR,S_W,1,tmpref));
897                             dec(tmpref.offset, 2);
898                             list.concat(taicpu.op_const_ref(A_RCR,S_W,1,tmpref));
899                             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
900                           end;
901                         OP_SAR:
902                           begin
903                             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
904                             inc(tmpref.offset, 2);
905                             list.concat(taicpu.op_const_ref(A_SAR,S_W,1,tmpref));
906                             dec(tmpref.offset, 2);
907                             list.concat(taicpu.op_const_ref(A_RCR,S_W,1,tmpref));
908                             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
909                           end;
910                         OP_SHL:
911                           begin
912                             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
913                             list.concat(taicpu.op_const_ref(A_SHL,S_W,1,tmpref));
914                             inc(tmpref.offset, 2);
915                             list.concat(taicpu.op_const_ref(A_RCL,S_W,1,tmpref));
916                             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
917                           end;
918                         else
919                           internalerror(2015042501);
920                       end;
921                     end
922                   else
923                     begin
924                       tmpreg:=getintregister(list,size);
925                       a_load_ref_reg(list,size,size,ref,tmpreg);
926                       a_op_const_reg(list,Op,size,a,tmpreg);
927                       a_load_reg_ref(list,size,size,tmpreg,ref);
928                     end;
929                 end;
930               OP_ROL,OP_ROR:
931                 begin
932                   tmpreg:=getintregister(list,size);
933                   a_load_ref_reg(list,size,size,ref,tmpreg);
934                   a_op_const_reg(list,Op,size,a,tmpreg);
935                   a_load_reg_ref(list,size,size,tmpreg,ref);
936                 end;
937               else
938                 internalerror(2013050802);
939             end;
940           end
941         else
942           inherited a_op_const_ref(list,Op,size,a,tmpref);
943       end;
944 
945 
946     procedure tcg8086.a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize;
947       src, dst: TRegister);
948       var
949         op1, op2: TAsmOp;
950         hl_skip, hl_loop_start: TAsmLabel;
951         ai: taicpu;
952         tmpreg: TRegister;
953       begin
954         check_register_size(size,src);
955         check_register_size(size,dst);
956         if size in [OS_64, OS_S64] then
957           internalerror(2013030902);
958         if size in [OS_32, OS_S32] then
959           begin
960             case op of
961               OP_NEG:
962                 begin
963                   if src<>dst then
964                     a_load_reg_reg(list,size,size,src,dst);
965                   list.concat(taicpu.op_reg(A_NOT, S_W, GetNextReg(dst)));
966                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
967                   list.concat(taicpu.op_reg(A_NEG, S_W, dst));
968                   list.concat(taicpu.op_const_reg(A_SBB, S_W,-1, GetNextReg(dst)));
969                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
970                 end;
971               OP_NOT:
972                 begin
973                   if src<>dst then
974                     a_load_reg_reg(list,size,size,src,dst);
975                   list.concat(taicpu.op_reg(A_NOT, S_W, dst));
976                   list.concat(taicpu.op_reg(A_NOT, S_W, GetNextReg(dst)));
977                 end;
978               OP_ADD,OP_SUB,OP_XOR,OP_OR,OP_AND:
979                 begin
980                   get_32bit_ops(op, op1, op2);
981                   if op in [OP_ADD,OP_SUB] then
982                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
983                   list.concat(taicpu.op_reg_reg(op1, S_W, src, dst));
984                   list.concat(taicpu.op_reg_reg(op2, S_W, GetNextReg(src), GetNextReg(dst)));
985                   if op in [OP_ADD,OP_SUB] then
986                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
987                 end;
988               OP_SHR,OP_SHL,OP_SAR:
989                 begin
990                   getcpuregister(list,NR_CX);
991                   a_load_reg_reg(list,size,OS_16,src,NR_CX);
992                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
993                   list.concat(taicpu.op_const_reg(A_AND,S_W,$1f,NR_CX));
994 
995                   current_asmdata.getjumplabel(hl_skip);
996                   ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl_skip);
997                   ai.SetCondition(C_Z);
998                   ai.is_jmp:=true;
999                   list.concat(ai);
1000                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1001 
1002                   current_asmdata.getjumplabel(hl_loop_start);
1003                   a_label(list,hl_loop_start);
1004 
1005                   case op of
1006                     OP_SHR:
1007                       begin
1008                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1009                         list.concat(taicpu.op_const_reg(A_SHR,S_W,1,GetNextReg(dst)));
1010                         list.concat(taicpu.op_const_reg(A_RCR,S_W,1,dst));
1011                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1012                       end;
1013                     OP_SAR:
1014                       begin
1015                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1016                         list.concat(taicpu.op_const_reg(A_SAR,S_W,1,GetNextReg(dst)));
1017                         list.concat(taicpu.op_const_reg(A_RCR,S_W,1,dst));
1018                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1019                       end;
1020                     OP_SHL:
1021                       begin
1022                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1023                         list.concat(taicpu.op_const_reg(A_SHL,S_W,1,dst));
1024                         list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(dst)));
1025                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1026                       end;
1027                     else
1028                       internalerror(2013030903);
1029                   end;
1030 
1031                   ai:=Taicpu.Op_Sym(A_LOOP,S_W,hl_loop_start);
1032                   ai.is_jmp:=true;
1033                   list.concat(ai);
1034 
1035                   a_label(list,hl_skip);
1036 
1037                   ungetcpuregister(list,NR_CX);
1038                 end;
1039               OP_ROL,OP_ROR:
1040                 begin
1041                   getcpuregister(list,NR_CX);
1042                   a_load_reg_reg(list,size,OS_16,src,NR_CX);
1043                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1044                   list.concat(taicpu.op_const_reg(A_AND,S_W,$1f,NR_CX));
1045 
1046                   current_asmdata.getjumplabel(hl_skip);
1047                   ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl_skip);
1048                   ai.SetCondition(C_Z);
1049                   ai.is_jmp:=true;
1050                   list.concat(ai);
1051                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1052 
1053                   current_asmdata.getjumplabel(hl_loop_start);
1054                   a_label(list,hl_loop_start);
1055 
1056                   case op of
1057                     OP_ROL:
1058                       begin
1059                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1060                         list.Concat(taicpu.op_const_reg(A_SHL,S_W,1,GetNextReg(dst)));
1061                         list.Concat(taicpu.op_const_reg(A_RCL,S_W,1,dst));
1062                         list.Concat(taicpu.op_const_reg(A_ADC,S_W,0,GetNextReg(dst)));
1063                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1064                       end;
1065                     OP_ROR:
1066                       begin
1067                         tmpreg:=getintregister(list,OS_16);
1068                         a_load_reg_reg(list,OS_16,OS_16,dst,tmpreg);
1069                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1070                         list.Concat(taicpu.op_const_reg(A_SHR,S_W,1,tmpreg));
1071                         list.Concat(taicpu.op_const_reg(A_RCR,S_W,1,GetNextReg(dst)));
1072                         list.Concat(taicpu.op_const_reg(A_RCR,S_W,1,dst));
1073                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1074                       end;
1075                     else
1076                       internalerror(2017042502);
1077                   end;
1078 
1079                   ai:=Taicpu.Op_Sym(A_LOOP,S_W,hl_loop_start);
1080                   ai.is_jmp:=true;
1081                   list.concat(ai);
1082 
1083                   a_label(list,hl_skip);
1084 
1085                   ungetcpuregister(list,NR_CX);
1086                 end;
1087               else
1088                 internalerror(2013030901);
1089             end;
1090           end
1091         else
1092           inherited a_op_reg_reg(list, Op, size, src, dst);
1093       end;
1094 
1095 
1096     procedure tcg8086.a_op_ref_reg(list: TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister);
1097       var
1098         tmpref  : treference;
1099         op1, op2: TAsmOp;
1100       begin
1101         tmpref:=ref;
1102         make_simple_ref(list,tmpref);
1103         check_register_size(size,reg);
1104 
1105         if size in [OS_64, OS_S64] then
1106           internalerror(2013030902);
1107 
1108         if size in [OS_32, OS_S32] then
1109           begin
1110             case op of
1111               OP_ADD,OP_SUB,OP_XOR,OP_OR,OP_AND:
1112                 begin
1113                   get_32bit_ops(op, op1, op2);
1114                   if op in [OP_ADD,OP_SUB] then
1115                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1116                   list.concat(taicpu.op_ref_reg(op1, S_W, tmpref, reg));
1117                   inc(tmpref.offset, 2);
1118                   list.concat(taicpu.op_ref_reg(op2, S_W, tmpref, GetNextReg(reg)));
1119                   if op in [OP_ADD,OP_SUB] then
1120                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1121                 end;
1122               else
1123                 internalerror(2013050701);
1124             end;
1125           end
1126         else
1127           inherited a_op_ref_reg(list,Op,size,tmpref,reg);
1128       end;
1129 
1130 
1131     procedure tcg8086.a_op_reg_ref(list: TAsmList; Op: TOpCG; size: TCGSize; reg: TRegister; const ref: TReference);
1132       var
1133         tmpref: treference;
1134         op1,op2: TAsmOp;
1135         hl_skip, hl_loop_start: TAsmLabel;
1136         ai: taicpu;
1137         tmpreg: TRegister;
1138       begin
1139         tmpref:=ref;
1140         make_simple_ref(list,tmpref);
1141         if not (op in [OP_NEG,OP_NOT,OP_SHR,OP_SHL,OP_SAR,OP_ROL,OP_ROR]) then
1142           check_register_size(size,reg);
1143 
1144         if size in [OS_64, OS_S64] then
1145           internalerror(2013050803);
1146 
1147         if size in [OS_32, OS_S32] then
1148           begin
1149             case op of
1150               OP_NEG:
1151                 begin
1152                   if reg<>NR_NO then
1153                     internalerror(200109237);
1154                   inc(tmpref.offset, 2);
1155                   list.concat(taicpu.op_ref(A_NOT, S_W, tmpref));
1156                   dec(tmpref.offset, 2);
1157                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1158                   list.concat(taicpu.op_ref(A_NEG, S_W, tmpref));
1159                   inc(tmpref.offset, 2);
1160                   list.concat(taicpu.op_const_ref(A_SBB, S_W,-1, tmpref));
1161                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1162                 end;
1163               OP_NOT:
1164                 begin
1165                   if reg<>NR_NO then
1166                     internalerror(200109237);
1167                   list.concat(taicpu.op_ref(A_NOT, S_W, tmpref));
1168                   inc(tmpref.offset, 2);
1169                   list.concat(taicpu.op_ref(A_NOT, S_W, tmpref));
1170                 end;
1171               OP_IMUL:
1172                 begin
1173                   { this one needs a load/imul/store, which is the default }
1174                   inherited a_op_ref_reg(list,op,size,tmpref,reg);
1175                 end;
1176               OP_MUL,OP_DIV,OP_IDIV:
1177                 { special stuff, needs separate handling inside code }
1178                 { generator                                          }
1179                 internalerror(200109238);
1180               OP_ADD,OP_SUB,OP_XOR,OP_OR,OP_AND:
1181                 begin
1182                   get_32bit_ops(op, op1, op2);
1183                   if op in [OP_ADD,OP_SUB] then
1184                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1185                   list.concat(taicpu.op_reg_ref(op1, S_W, reg, tmpref));
1186                   inc(tmpref.offset, 2);
1187                   list.concat(taicpu.op_reg_ref(op2, S_W, GetNextReg(reg), tmpref));
1188                   if op in [OP_ADD,OP_SUB] then
1189                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1190                 end;
1191               OP_SHR,OP_SHL,OP_SAR:
1192                 begin
1193                   getcpuregister(list,NR_CX);
1194                   a_load_reg_reg(list,size,OS_16,reg,NR_CX);
1195                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1196                   list.concat(taicpu.op_const_reg(A_AND,S_W,$1f,NR_CX));
1197 
1198                   current_asmdata.getjumplabel(hl_skip);
1199                   ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl_skip);
1200                   ai.SetCondition(C_Z);
1201                   ai.is_jmp:=true;
1202                   list.concat(ai);
1203                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1204 
1205                   current_asmdata.getjumplabel(hl_loop_start);
1206                   a_label(list,hl_loop_start);
1207 
1208                   case op of
1209                     OP_SHR:
1210                       begin
1211                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1212                         inc(tmpref.offset, 2);
1213                         list.concat(taicpu.op_const_ref(A_SHR,S_W,1,tmpref));
1214                         dec(tmpref.offset, 2);
1215                         list.concat(taicpu.op_const_ref(A_RCR,S_W,1,tmpref));
1216                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1217                       end;
1218                     OP_SAR:
1219                       begin
1220                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1221                         inc(tmpref.offset, 2);
1222                         list.concat(taicpu.op_const_ref(A_SAR,S_W,1,tmpref));
1223                         dec(tmpref.offset, 2);
1224                         list.concat(taicpu.op_const_ref(A_RCR,S_W,1,tmpref));
1225                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1226                       end;
1227                     OP_SHL:
1228                       begin
1229                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1230                         list.concat(taicpu.op_const_ref(A_SHL,S_W,1,tmpref));
1231                         inc(tmpref.offset, 2);
1232                         list.concat(taicpu.op_const_ref(A_RCL,S_W,1,tmpref));
1233                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
1234                       end;
1235                     else
1236                       internalerror(2013030903);
1237                   end;
1238 
1239                   ai:=Taicpu.Op_Sym(A_LOOP,S_W,hl_loop_start);
1240                   ai.is_jmp:=true;
1241                   list.concat(ai);
1242 
1243                   a_label(list,hl_skip);
1244 
1245                   ungetcpuregister(list,NR_CX);
1246                 end;
1247               OP_ROL,OP_ROR:
1248                 begin
1249                   tmpreg:=getintregister(list,size);
1250                   a_load_ref_reg(list,size,size,ref,tmpreg);
1251                   a_op_reg_reg(list,Op,size,reg,tmpreg);
1252                   a_load_reg_ref(list,size,size,tmpreg,ref);
1253                 end;
1254               else
1255                 internalerror(2013050804);
1256             end;
1257           end
1258         else
1259           inherited a_op_reg_ref(list,Op,size,reg,tmpref);
1260       end;
1261 
1262 
1263     procedure tcg8086.push_const(list: TAsmList; size: tcgsize; a: tcgint);
1264       var
1265         tmpreg: TRegister;
1266       begin
1267         if not (size in [OS_16,OS_S16]) then
1268           internalerror(2013043001);
1269         if current_settings.cputype < cpu_186 then
1270           begin
1271             tmpreg:=getintregister(list,size);
1272             a_load_const_reg(list,size,a,tmpreg);
1273             list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1274           end
1275         else
1276           list.concat(taicpu.op_const(A_PUSH,TCGSize2OpSize[size],a));
1277       end;
1278 
1279 
1280     procedure tcg8086.a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara);
1281 
1282       procedure load_para_loc(r : TRegister;paraloc : PCGParaLocation);
1283         var
1284           ref : treference;
1285         begin
1286           paramanager.allocparaloc(list,paraloc);
1287           case paraloc^.loc of
1288              LOC_REGISTER,LOC_CREGISTER:
1289                a_load_reg_reg(list,paraloc^.size,paraloc^.size,r,paraloc^.register);
1290              LOC_REFERENCE,LOC_CREFERENCE:
1291                begin
1292                   reference_reset_base(ref,paraloc^.reference.index,paraloc^.reference.offset,ctempposinvalid,2,[]);
1293                   a_load_reg_ref(list,paraloc^.size,paraloc^.size,r,ref);
1294                end;
1295              else
1296                internalerror(2002071004);
1297           end;
1298         end;
1299       var
1300         pushsize,pushsize2 : tcgsize;
1301 
1302       begin
1303         check_register_size(size,r);
1304         if use_push(cgpara) then
1305           begin
1306             if tcgsize2size[cgpara.Size] > 2 then
1307               begin
1308                 if tcgsize2size[cgpara.Size] <> 4 then
1309                   internalerror(2013031101);
1310                 if cgpara.location^.Next = nil then
1311                   begin
1312                     if tcgsize2size[cgpara.location^.size] <> 4 then
1313                       internalerror(2013031101);
1314                   end
1315                 else
1316                   begin
1317                     if tcgsize2size[cgpara.location^.size] <> 2 then
1318                       internalerror(2013031101);
1319                     if tcgsize2size[cgpara.location^.Next^.size] <> 2 then
1320                       internalerror(2013031101);
1321                     if cgpara.location^.Next^.Next <> nil then
1322                       internalerror(2013031101);
1323                   end;
1324 
1325                 if tcgsize2size[cgpara.size]>cgpara.alignment then
1326                   pushsize:=cgpara.size
1327                 else
1328                   pushsize:=int_cgsize(cgpara.alignment);
1329                 pushsize2 := int_cgsize(tcgsize2size[pushsize] - 2);
1330                 list.concat(taicpu.op_reg(A_PUSH,TCgsize2opsize[pushsize2],makeregsize(list,GetNextReg(r),pushsize2)));
1331                 list.concat(taicpu.op_reg(A_PUSH,S_W,makeregsize(list,r,OS_16)));
1332               end
1333             else
1334               begin
1335                 cgpara.check_simple_location;
1336                 if tcgsize2size[cgpara.location^.size]>cgpara.alignment then
1337                   pushsize:=cgpara.location^.size
1338                 else
1339                   pushsize:=int_cgsize(cgpara.alignment);
1340                 list.concat(taicpu.op_reg(A_PUSH,TCgsize2opsize[pushsize],makeregsize(list,r,pushsize)));
1341               end;
1342           end
1343         else
1344           begin
1345             if tcgsize2size[cgpara.Size]=4 then
1346               begin
1347                 if (cgpara.location^.Next=nil) or
1348                   (tcgsize2size[cgpara.location^.size]<>2) or
1349                   (tcgsize2size[cgpara.location^.Next^.size]<>2) or
1350                   (cgpara.location^.Next^.Next<>nil) or
1351                   (cgpara.location^.shiftval<>0) then
1352                   internalerror(2013031102);
1353                 load_para_loc(r,cgpara.Location);
1354                 load_para_loc(GetNextReg(r),cgpara.Location^.Next);
1355               end
1356             else
1357               inherited a_load_reg_cgpara(list,size,r,cgpara);
1358           end;
1359       end;
1360 
1361 
1362     procedure tcg8086.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const cgpara : tcgpara);
1363       var
1364         pushsize : tcgsize;
1365       begin
1366         if use_push(cgpara) then
1367           begin
1368             if tcgsize2size[cgpara.Size] > 2 then
1369               begin
1370                 if tcgsize2size[cgpara.Size] <> 4 then
1371                   internalerror(2013031101);
1372                 if cgpara.location^.Next = nil then
1373                   begin
1374                     if tcgsize2size[cgpara.location^.size] <> 4 then
1375                       internalerror(2013031101);
1376                   end
1377                 else
1378                   begin
1379                     if tcgsize2size[cgpara.location^.size] <> 2 then
1380                       internalerror(2013031101);
1381                     if tcgsize2size[cgpara.location^.Next^.size] <> 2 then
1382                       internalerror(2013031101);
1383                     if cgpara.location^.Next^.Next <> nil then
1384                       internalerror(2013031101);
1385                   end;
1386                 if (cgpara.alignment <> 4) and (cgpara.alignment <> 2) then
1387                   internalerror(2013031101);
1388 
1389                 push_const(list,OS_16,a shr 16);
1390                 push_const(list,OS_16,a and $FFFF);
1391               end
1392             else
1393               begin
1394                 cgpara.check_simple_location;
1395                 if tcgsize2size[cgpara.location^.size]>cgpara.alignment then
1396                   pushsize:=cgpara.location^.size
1397                 else
1398                   pushsize:=int_cgsize(cgpara.alignment);
1399                 push_const(list,pushsize,a);
1400               end;
1401           end
1402         else if (tcgsize2size[cgpara.Size]>2) and
1403                 (cgpara.location^.loc in [LOC_REGISTER,LOC_CREGISTER]) and
1404                 (cgpara.location^.Next<>nil) then
1405           begin
1406             if (tcgsize2size[cgpara.Size]<>4) or
1407                (tcgsize2size[cgpara.location^.Size]<>2) or
1408                 not (cgpara.location^.Next^.Loc in [LOC_REGISTER,LOC_CREGISTER]) or
1409                (tcgsize2size[cgpara.location^.Next^.Size]<>2) or
1410                (cgpara.location^.Next^.Next<>nil) then
1411               internalerror(2018041801);
1412             a_load_const_reg(list,cgpara.location^.size,a and $FFFF,cgpara.location^.register);
1413             a_load_const_reg(list,cgpara.location^.Next^.size,a shr 16,cgpara.location^.Next^.register);
1414           end
1415         else
1416           inherited a_load_const_cgpara(list,size,a,cgpara);
1417       end;
1418 
1419 
1420     procedure tcg8086.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : tcgpara);
1421 
1422         procedure pushdata(paraloc:pcgparalocation;ofs:tcgint);
1423         var
1424           pushsize : tcgsize;
1425           opsize : topsize;
1426           tmpreg   : tregister;
1427           href,tmpref: treference;
1428         begin
1429           if not assigned(paraloc) then
1430             exit;
1431           if (paraloc^.loc<>LOC_REFERENCE) or
1432              (paraloc^.reference.index<>NR_STACK_POINTER_REG) or
1433              (tcgsize2size[paraloc^.size]>4) then
1434             internalerror(200501162);
1435           { Pushes are needed in reverse order, add the size of the
1436             current location to the offset where to load from. This
1437             prevents wrong calculations for the last location when
1438             the size is not a power of 2 }
1439           if assigned(paraloc^.next) then
1440             pushdata(paraloc^.next,ofs+tcgsize2size[paraloc^.size]);
1441           { Push the data starting at ofs }
1442           href:=r;
1443           inc(href.offset,ofs);
1444           if tcgsize2size[paraloc^.size]>cgpara.alignment then
1445             pushsize:=paraloc^.size
1446           else
1447             pushsize:=int_cgsize(cgpara.alignment);
1448           opsize:=TCgsize2opsize[pushsize];
1449           { for go32v2 we obtain OS_F32,
1450             but pushs is not valid, we need pushl }
1451           if opsize=S_FS then
1452             opsize:=S_W;
1453           if tcgsize2size[paraloc^.size]<cgpara.alignment then
1454             begin
1455               tmpreg:=getintregister(list,pushsize);
1456               a_load_ref_reg(list,paraloc^.size,pushsize,href,tmpreg);
1457               list.concat(taicpu.op_reg(A_PUSH,opsize,tmpreg));
1458             end
1459           else
1460             begin
1461               make_simple_ref(list,href);
1462               if tcgsize2size[pushsize] > 2 then
1463                 begin
1464                   tmpref := href;
1465                   Inc(tmpref.offset, 2);
1466                   list.concat(taicpu.op_ref(A_PUSH,TCgsize2opsize[int_cgsize(tcgsize2size[pushsize]-2)],tmpref));
1467                 end;
1468               list.concat(taicpu.op_ref(A_PUSH,opsize,href));
1469             end;
1470         end;
1471 
1472       var
1473         len : tcgint;
1474         href : treference;
1475       begin
1476         { cgpara.size=OS_NO requires a copy on the stack }
1477         if use_push(cgpara) then
1478           begin
1479             { Record copy? }
1480             if (cgpara.size in [OS_NO,OS_F64]) or (size=OS_NO) then
1481               begin
1482                 cgpara.check_simple_location;
1483                 len:=align(cgpara.intsize,cgpara.alignment);
1484                 g_stackpointer_alloc(list,len);
1485                 reference_reset_base(href,NR_STACK_POINTER_REG,0,ctempposinvalid,4,[]);
1486                 g_concatcopy(list,r,href,len);
1487               end
1488             else
1489               begin
1490                 if tcgsize2size[cgpara.size]<>tcgsize2size[size] then
1491                   internalerror(200501161);
1492                 { We need to push the data in reverse order,
1493                   therefor we use a recursive algorithm }
1494                 pushdata(cgpara.location,0);
1495               end
1496           end
1497         else
1498           inherited a_load_ref_cgpara(list,size,r,cgpara);
1499       end;
1500 
1501 
1502     procedure tcg8086.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);
1503       var
1504         tmpreg : tregister;
1505         tmpref : treference;
1506       begin
1507         with r do
1508           begin
1509             if use_push(cgpara) then
1510               begin
1511                 if tcgsize2size[cgpara.Size] > 2 then
1512                   begin
1513                     if tcgsize2size[cgpara.Size] <> 4 then
1514                       internalerror(2014032401);
1515                     if cgpara.location^.Next = nil then
1516                       begin
1517                         if tcgsize2size[cgpara.location^.size] <> 4 then
1518                           internalerror(2014032401);
1519                       end
1520                     else
1521                       begin
1522                         if tcgsize2size[cgpara.location^.size] <> 2 then
1523                           internalerror(2014032401);
1524                         if tcgsize2size[cgpara.location^.Next^.size] <> 2 then
1525                           internalerror(2014032401);
1526                         if cgpara.location^.Next^.Next <> nil then
1527                           internalerror(2014032401);
1528                       end;
1529                     if cgpara.alignment > 4 then
1530                       internalerror(2014032401);
1531 
1532                     if segment<>NR_NO then
1533                       begin
1534                         list.concat(Taicpu.op_reg(A_PUSH,S_W,segment));
1535                         tmpref:=r;
1536                         tmpref.segment:=NR_NO;
1537                         tmpreg:=getaddressregister(list);
1538                         a_loadaddr_ref_reg(list,tmpref,tmpreg);
1539                         list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1540                       end
1541                     else
1542                       begin
1543                         if (base=NR_NO) and (index=NR_NO) then
1544                           begin
1545                             if assigned(symbol) then
1546                               begin
1547                                 tmpref:=r;
1548                                 tmpref.refaddr:=addr_seg;
1549                                 tmpref.offset:=0;
1550                                 if current_settings.cputype < cpu_186 then
1551                                   begin
1552                                     tmpreg:=getaddressregister(list);
1553                                     a_load_ref_reg(list,OS_16,OS_16,tmpref,tmpreg);
1554                                     list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1555                                   end
1556                                 else
1557                                   list.concat(Taicpu.Op_ref(A_PUSH,S_W,tmpref));
1558                                 if current_settings.cputype < cpu_186 then
1559                                   begin
1560                                     tmpreg:=getaddressregister(list);
1561                                     a_loadaddr_ref_reg(list,r,tmpreg);
1562                                     list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1563                                   end
1564                                 else
1565                                   list.concat(Taicpu.Op_sym_ofs(A_PUSH,S_W,symbol,offset));
1566                               end
1567                             else
1568                               internalerror(2014032402);
1569                           end
1570                         else if assigned(symbol) then
1571                           begin
1572                             reference_reset_symbol(tmpref,r.symbol,0,r.alignment,r.volatility);
1573                             tmpref.refaddr:=addr_seg;
1574                             if current_settings.cputype < cpu_186 then
1575                               begin
1576                                 tmpreg:=getaddressregister(list);
1577                                 a_load_ref_reg(list,OS_16,OS_16,tmpref,tmpreg);
1578                                 list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1579                               end
1580                             else
1581                               list.concat(Taicpu.Op_ref(A_PUSH,S_W,tmpref));
1582                             tmpreg:=getaddressregister(list);
1583                             a_loadaddr_ref_reg(list,r,tmpreg);
1584                             list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1585                           end
1586                         else if base=NR_BP then
1587                           begin
1588                             list.concat(Taicpu.op_reg(A_PUSH,S_W,NR_SS));
1589                             tmpreg:=getaddressregister(list);
1590                             a_loadaddr_ref_reg(list,r,tmpreg);
1591                             list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1592                           end
1593                         else
1594                           internalerror(2014032403);
1595                       end;
1596                   end
1597                 else
1598                   begin
1599                     cgpara.check_simple_location;
1600                     tmpref:=r;
1601                     tmpref.segment:=NR_NO;
1602                     with tmpref do
1603                       begin
1604                         if (base=NR_NO) and (index=NR_NO) then
1605                           begin
1606                             if assigned(symbol) then
1607                               begin
1608                                 if current_settings.cputype < cpu_186 then
1609                                   begin
1610                                     tmpreg:=getaddressregister(list);
1611                                     a_loadaddr_ref_reg(list,tmpref,tmpreg);
1612                                     list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1613                                   end
1614                                 else
1615                                   list.concat(Taicpu.Op_sym_ofs(A_PUSH,S_W,symbol,offset));
1616                               end
1617                             else
1618                               push_const(list,OS_16,offset);
1619                           end
1620                         else if (base=NR_NO) and (index<>NR_NO) and
1621                                 (offset=0) and (scalefactor=0) and (symbol=nil) then
1622                           list.concat(Taicpu.Op_reg(A_PUSH,S_W,index))
1623                         else if (base<>NR_NO) and (index=NR_NO) and
1624                                 (offset=0) and (symbol=nil) then
1625                           list.concat(Taicpu.Op_reg(A_PUSH,S_W,base))
1626                         else
1627                           begin
1628                             tmpreg:=getaddressregister(list);
1629                             a_loadaddr_ref_reg(list,tmpref,tmpreg);
1630                             list.concat(taicpu.op_reg(A_PUSH,S_W,tmpreg));
1631                           end;
1632                       end;
1633                 end;
1634               end
1635             else
1636               inherited a_loadaddr_ref_cgpara(list,r,cgpara);
1637           end;
1638       end;
1639 
1640 
1641     procedure tcg8086.a_load_const_reg(list : TAsmList; tosize: tcgsize; a : tcgint;reg : tregister);
1642     begin
1643       check_register_size(tosize,reg);
1644       if tosize in [OS_S32,OS_32] then
1645         begin
1646           list.concat(taicpu.op_const_reg(A_MOV,S_W,longint(a and $ffff),reg));
1647           list.concat(taicpu.op_const_reg(A_MOV,S_W,longint(a shr 16),GetNextReg(reg)));
1648         end
1649       else
1650         list.concat(taicpu.op_const_reg(A_MOV,TCGSize2OpSize[tosize],a,reg));
1651     end;
1652 
1653 
1654     procedure tcg8086.a_load_const_ref(list : TAsmList; tosize: tcgsize; a : tcgint;const ref : treference);
1655       var
1656         tmpref : treference;
1657       begin
1658         tmpref:=ref;
1659         make_simple_ref(list,tmpref);
1660 
1661         if tosize in [OS_S32,OS_32] then
1662           begin
1663             a_load_const_ref(list,OS_16,longint(a and $ffff),tmpref);
1664             inc(tmpref.offset,2);
1665             a_load_const_ref(list,OS_16,longint(a shr 16),tmpref);
1666           end
1667         else
1668           list.concat(taicpu.op_const_ref(A_MOV,TCGSize2OpSize[tosize],a,tmpref));
1669       end;
1670 
1671 
1672 
1673     procedure tcg8086.a_load_reg_ref(list : TAsmList;fromsize,tosize: tcgsize; reg : tregister;const ref : treference);
1674       var
1675         tmpreg  : tregister;
1676         tmpref  : treference;
1677       begin
1678         tmpref:=ref;
1679         make_simple_ref(list,tmpref);
1680         check_register_size(fromsize,reg);
1681 
1682         case tosize of
1683           OS_8,OS_S8:
1684             if fromsize in [OS_8,OS_S8] then
1685               list.concat(taicpu.op_reg_ref(A_MOV, S_B, reg, tmpref))
1686             else
1687               internalerror(2013030310);
1688           OS_16,OS_S16:
1689             case fromsize of
1690               OS_8,OS_S8:
1691                 begin
1692                   tmpreg:=getintregister(list,tosize);
1693                   a_load_reg_reg(list,fromsize,tosize,reg,tmpreg);
1694                   a_load_reg_ref(list,tosize,tosize,tmpreg,tmpref);
1695                 end;
1696               OS_16,OS_S16:
1697                 begin
1698                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
1699                 end;
1700               else
1701                 internalerror(2013030312);
1702             end;
1703           OS_32,OS_S32:
1704             case fromsize of
1705               OS_8,OS_S8,OS_S16:
1706                 begin
1707                   tmpreg:=getintregister(list,tosize);
1708                   a_load_reg_reg(list,fromsize,tosize,reg,tmpreg);
1709                   a_load_reg_ref(list,tosize,tosize,tmpreg,tmpref);
1710                 end;
1711               OS_16:
1712                 begin
1713                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
1714                   inc(tmpref.offset, 2);
1715                   list.concat(taicpu.op_const_ref(A_MOV, S_W, 0, tmpref));
1716                 end;
1717               OS_32,OS_S32:
1718                 begin
1719                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
1720                   inc(tmpref.offset, 2);
1721                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, GetNextReg(reg), tmpref));
1722                 end;
1723               else
1724                 internalerror(2013030313);
1725             end;
1726           else
1727             internalerror(2013030311);
1728         end;
1729       end;
1730 
1731 
1732     procedure tcg8086.a_load_ref_reg_internal(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister;isdirect:boolean);
1733 
1734         procedure add_mov(instr: Taicpu);
1735           begin
1736             { Notify the register allocator that we have written a move instruction so
1737               it can try to eliminate it. }
1738             if (instr.oper[0]^.reg<>current_procinfo.framepointer) and (instr.oper[0]^.reg<>NR_STACK_POINTER_REG) then
1739               add_move_instruction(instr);
1740             list.concat(instr);
1741           end;
1742 
1743       var
1744         tmpref  : treference;
1745       begin
1746         tmpref:=ref;
1747         make_simple_ref(list,tmpref,isdirect);
1748         check_register_size(tosize,reg);
1749 
1750         if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
1751           internalerror(2011021307);
1752 {        if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
1753           fromsize:=tosize;}
1754 
1755         case tosize of
1756           OS_8,OS_S8:
1757             if fromsize in [OS_8,OS_S8] then
1758               list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, reg))
1759             else
1760               internalerror(2013030210);
1761           OS_16,OS_S16:
1762             case fromsize of
1763               OS_8:
1764                 begin
1765                   if current_settings.cputype>=cpu_386 then
1766                     list.concat(taicpu.op_ref_reg(A_MOVZX, S_BW, tmpref, reg))
1767                   else
1768                     begin
1769                       reg := makeregsize(list, reg, OS_8);
1770                       list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, reg));
1771                       setsubreg(reg, R_SUBH);
1772                       list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
1773                       makeregsize(list, reg, OS_16);
1774                     end;
1775                 end;
1776               OS_S8:
1777                 begin
1778                   if current_settings.cputype>=cpu_386 then
1779                     list.concat(taicpu.op_ref_reg(A_MOVSX, S_BW, tmpref, reg))
1780                   else
1781                     begin
1782                       getcpuregister(list, NR_AX);
1783                       list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, NR_AL));
1784                       list.concat(taicpu.op_none(A_CBW));
1785                       ungetcpuregister(list, NR_AX);
1786                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg));
1787                     end;
1788                 end;
1789               OS_16,OS_S16:
1790                 list.concat(taicpu.op_ref_reg(A_MOV, S_W, tmpref, reg));
1791               else
1792                 internalerror(2013030212);
1793             end;
1794           OS_32,OS_S32:
1795             case fromsize of
1796               OS_8:
1797                 begin
1798                   list.concat(taicpu.op_const_reg(A_MOV,S_W,0,GetNextReg(reg)));
1799                   if current_settings.cputype>=cpu_386 then
1800                     list.concat(taicpu.op_ref_reg(A_MOVZX, S_BW, tmpref, reg))
1801                   else
1802                     begin
1803                       reg := makeregsize(list, reg, OS_8);
1804                       list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, reg));
1805                       setsubreg(reg, R_SUBH);
1806                       list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
1807                       makeregsize(list, reg, OS_16);
1808                     end;
1809                 end;
1810               OS_S8:
1811                 begin
1812                   getcpuregister(list, NR_AX);
1813                   list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, NR_AL));
1814                   getcpuregister(list, NR_DX);
1815                   list.concat(taicpu.op_none(A_CBW));
1816                   list.concat(taicpu.op_none(A_CWD));
1817                   ungetcpuregister(list, NR_AX);
1818                   add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg));
1819                   ungetcpuregister(list, NR_DX);
1820                   add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg)));
1821                 end;
1822               OS_16:
1823                 begin
1824                   list.concat(taicpu.op_ref_reg(A_MOV, S_W, tmpref, reg));
1825                   list.concat(taicpu.op_const_reg(A_MOV,S_W,0,GetNextReg(reg)));
1826                 end;
1827               OS_S16:
1828                 begin
1829                   getcpuregister(list, NR_AX);
1830                   list.concat(taicpu.op_ref_reg(A_MOV, S_W, tmpref, NR_AX));
1831                   getcpuregister(list, NR_DX);
1832                   list.concat(taicpu.op_none(A_CWD));
1833                   ungetcpuregister(list, NR_AX);
1834                   add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg));
1835                   ungetcpuregister(list, NR_DX);
1836                   add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg)));
1837                 end;
1838               OS_32,OS_S32:
1839                 begin
1840                   list.concat(taicpu.op_ref_reg(A_MOV, S_W, tmpref, reg));
1841                   inc(tmpref.offset, 2);
1842                   list.concat(taicpu.op_ref_reg(A_MOV, S_W, tmpref, GetNextReg(reg)));
1843                 end;
1844               else
1845                 internalerror(2013030213);
1846             end;
1847           else
1848             internalerror(2013030211);
1849         end;
1850       end;
1851 
1852 
1853     procedure tcg8086.a_load_reg_reg(list : TAsmList;fromsize,tosize: tcgsize;reg1,reg2 : tregister);
1854 
1855         procedure add_mov(instr: Taicpu);
1856           begin
1857             { Notify the register allocator that we have written a move instruction so
1858               it can try to eliminate it. }
1859             if (instr.oper[0]^.reg<>current_procinfo.framepointer) and (instr.oper[0]^.reg<>NR_STACK_POINTER_REG) then
1860               add_move_instruction(instr);
1861             list.concat(instr);
1862           end;
1863 
1864       begin
1865         check_register_size(fromsize,reg1);
1866         check_register_size(tosize,reg2);
1867 
1868         if tcgsize2size[fromsize]>tcgsize2size[tosize] then
1869           begin
1870             if tosize in [OS_32, OS_S32] then
1871               internalerror(2013031801);
1872             reg1:=makeregsize(list,reg1,tosize);
1873             fromsize:=tosize;
1874           end;
1875 
1876         if (reg1<>reg2) or (fromsize<>tosize) then
1877           begin
1878             case tosize of
1879               OS_8,OS_S8:
1880                 if fromsize in [OS_8,OS_S8] then
1881                   begin
1882                     if reg1<>reg2 then
1883                       add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
1884                   end
1885                 else
1886                   internalerror(2013030210);
1887               OS_16,OS_S16:
1888                 case fromsize of
1889                   OS_8:
1890                     begin
1891                       if current_settings.cputype>=cpu_386 then
1892                         add_mov(taicpu.op_reg_reg(A_MOVZX, S_BW, reg1, reg2))
1893                       else
1894                         begin
1895                           reg2 := makeregsize(list, reg2, OS_8);
1896                           if reg1<>reg2 then
1897                             add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
1898                           setsubreg(reg2,R_SUBH);
1899                           list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg2));
1900                           makeregsize(list, reg2, OS_16);
1901                         end;
1902                     end;
1903                   OS_S8:
1904                     begin
1905                       if current_settings.cputype>=cpu_386 then
1906                         add_mov(taicpu.op_reg_reg(A_MOVSX, S_BW, reg1, reg2))
1907                       else
1908                         begin
1909                           getcpuregister(list, NR_AX);
1910                           add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, NR_AL));
1911                           list.concat(taicpu.op_none(A_CBW));
1912                           ungetcpuregister(list, NR_AX);
1913                           add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg2));
1914                         end;
1915                     end;
1916                   OS_16,OS_S16:
1917                     begin
1918                       if reg1<>reg2 then
1919                         add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
1920                     end
1921                   else
1922                     internalerror(2013030212);
1923                 end;
1924               OS_32,OS_S32:
1925                 case fromsize of
1926                   OS_8:
1927                     begin
1928                       list.concat(taicpu.op_const_reg(A_MOV, S_W, 0, GetNextReg(reg2)));
1929                       if current_settings.cputype>=cpu_386 then
1930                         add_mov(taicpu.op_reg_reg(A_MOVZX, S_BW, reg1, reg2))
1931                       else
1932                         begin
1933                           reg2 := makeregsize(list, reg2, OS_8);
1934                           if reg1<>reg2 then
1935                             add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
1936                           setsubreg(reg2,R_SUBH);
1937                           list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg2));
1938                           makeregsize(list, reg2, OS_16);
1939                         end;
1940                     end;
1941                   OS_S8:
1942                     begin
1943                       getcpuregister(list, NR_AX);
1944                       add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, NR_AL));
1945                       getcpuregister(list, NR_DX);
1946                       list.concat(taicpu.op_none(A_CBW));
1947                       list.concat(taicpu.op_none(A_CWD));
1948                       ungetcpuregister(list, NR_AX);
1949                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg2));
1950                       ungetcpuregister(list, NR_DX);
1951                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
1952                     end;
1953                   OS_16:
1954                     begin
1955                       if reg1<>reg2 then
1956                         add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
1957                       list.concat(taicpu.op_const_reg(A_MOV,S_W,0,GetNextReg(reg2)));
1958                     end;
1959                   OS_S16:
1960                     begin
1961                       getcpuregister(list, NR_AX);
1962                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, NR_AX));
1963                       getcpuregister(list, NR_DX);
1964                       list.concat(taicpu.op_none(A_CWD));
1965                       ungetcpuregister(list, NR_AX);
1966                       if reg1<>reg2 then
1967                         add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg2));
1968                       ungetcpuregister(list, NR_DX);
1969                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
1970                     end;
1971                   OS_32,OS_S32:
1972                     begin
1973                       if reg1<>reg2 then
1974                         begin
1975                           add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
1976                           add_mov(taicpu.op_reg_reg(A_MOV, S_W, GetNextReg(reg1), GetNextReg(reg2)));
1977                         end;
1978                     end;
1979                   else
1980                     internalerror(2013030213);
1981                 end;
1982               else
1983                 internalerror(2013030211);
1984             end;
1985           end;
1986       end;
1987 
1988 
1989     procedure tcg8086.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
1990       var
1991         hl_skip: TAsmLabel;
1992       begin
1993         if size in [OS_32, OS_S32] then
1994           begin
1995             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
1996             if (longint(a shr 16) = 0) then
1997               list.concat(taicpu.op_reg_reg(A_TEST,S_W,GetNextReg(reg),GetNextReg(reg)))
1998             else
1999               list.concat(taicpu.op_const_reg(A_CMP,S_W,longint(a shr 16),GetNextReg(reg)));
2000             current_asmdata.getjumplabel(hl_skip);
2001             gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
2002 
2003             if (longint(a and $ffff) = 0) then
2004               list.concat(taicpu.op_reg_reg(A_TEST,S_W,reg,reg))
2005             else
2006               list.concat(taicpu.op_const_reg(A_CMP,S_W,longint(a and $ffff),reg));
2007             gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
2008             a_label(list,hl_skip);
2009             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2010           end
2011         else
2012           inherited a_cmp_const_reg_label(list, size, cmp_op, a, reg, l);
2013       end;
2014 
2015 
2016     procedure tcg8086.a_cmp_const_ref_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; const ref: treference; l: tasmlabel);
2017       var
2018         tmpref: treference;
2019         hl_skip: TAsmLabel;
2020       begin
2021         if size in [OS_32, OS_S32] then
2022           begin
2023             tmpref:=ref;
2024             make_simple_ref(list,tmpref);
2025             inc(tmpref.offset,2);
2026             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2027             list.concat(taicpu.op_const_ref(A_CMP,S_W,longint(a shr 16),tmpref));
2028             current_asmdata.getjumplabel(hl_skip);
2029             gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
2030             dec(tmpref.offset,2);
2031             list.concat(taicpu.op_const_ref(A_CMP,S_W,longint(a and $ffff),tmpref));
2032             gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
2033             a_label(list,hl_skip);
2034             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2035           end
2036         else
2037           inherited a_cmp_const_ref_label(list, size, cmp_op, a, ref, l);
2038       end;
2039 
2040 
2041     procedure tcg8086.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
2042       var
2043         hl_skip: TAsmLabel;
2044       begin
2045         if size in [OS_32, OS_S32] then
2046           begin
2047             check_register_size(size,reg1);
2048             check_register_size(size,reg2);
2049             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2050             list.concat(taicpu.op_reg_reg(A_CMP,S_W,GetNextReg(reg1),GetNextReg(reg2)));
2051             current_asmdata.getjumplabel(hl_skip);
2052             gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
2053             list.concat(taicpu.op_reg_reg(A_CMP,S_W,reg1,reg2));
2054             gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
2055             a_label(list,hl_skip);
2056             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2057           end
2058         else
2059           inherited a_cmp_reg_reg_label(list, size, cmp_op, reg1, reg2, l);
2060       end;
2061 
2062 
2063     procedure tcg8086.a_cmp_ref_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; const ref: treference; reg: tregister; l: tasmlabel);
2064       var
2065         tmpref: treference;
2066         hl_skip: TAsmLabel;
2067       begin
2068         if size in [OS_32, OS_S32] then
2069           begin
2070             tmpref:=ref;
2071             make_simple_ref(list,tmpref);
2072             check_register_size(size,reg);
2073             inc(tmpref.offset,2);
2074             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2075             list.concat(taicpu.op_ref_reg(A_CMP,S_W,tmpref,GetNextReg(reg)));
2076             current_asmdata.getjumplabel(hl_skip);
2077             gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
2078             dec(tmpref.offset,2);
2079             list.concat(taicpu.op_ref_reg(A_CMP,S_W,tmpref,reg));
2080             gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
2081             a_label(list,hl_skip);
2082             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2083           end
2084         else
2085           inherited a_cmp_ref_reg_label(list, size, cmp_op, ref, reg, l);
2086       end;
2087 
2088 
2089     procedure tcg8086.a_cmp_reg_ref_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg: tregister; const ref: treference; l: tasmlabel);
2090       var
2091         tmpref: treference;
2092         hl_skip: TAsmLabel;
2093       begin
2094         if size in [OS_32, OS_S32] then
2095           begin
2096             tmpref:=ref;
2097             make_simple_ref(list,tmpref);
2098             check_register_size(size,reg);
2099             inc(tmpref.offset,2);
2100             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2101             list.concat(taicpu.op_reg_ref(A_CMP,S_W,GetNextReg(reg),tmpref));
2102             current_asmdata.getjumplabel(hl_skip);
2103             gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
2104             dec(tmpref.offset,2);
2105             list.concat(taicpu.op_reg_ref(A_CMP,S_W,reg,tmpref));
2106             gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
2107             a_label(list,hl_skip);
2108             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2109           end
2110         else
2111           inherited a_cmp_reg_ref_label(list, size, cmp_op, reg, ref, l);
2112       end;
2113 
2114 
2115     procedure tcg8086.gen_cmp32_jmp1(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
2116       begin
2117         case cmp_op of
2118           OC_EQ:
2119             a_jmp_cond(list, OC_NE, l_skip);
2120           OC_NE:
2121             a_jmp_cond(list, OC_NE, l_target);
2122           OC_GT,OC_GTE:
2123             begin
2124               a_jmp_cond(list, OC_GT, l_target);
2125               a_jmp_cond(list, OC_LT, l_skip);
2126             end;
2127           OC_LT,OC_LTE:
2128             begin
2129               a_jmp_cond(list, OC_LT, l_target);
2130               a_jmp_cond(list, OC_GT, l_skip);
2131             end;
2132           OC_B,OC_BE:
2133             begin
2134               a_jmp_cond(list, OC_B, l_target);
2135               a_jmp_cond(list, OC_A, l_skip);
2136             end;
2137           OC_A,OC_AE:
2138             begin
2139               a_jmp_cond(list, OC_A, l_target);
2140               a_jmp_cond(list, OC_B, l_skip);
2141             end;
2142           else
2143             internalerror(2014010305);
2144         end;
2145       end;
2146 
2147     procedure tcg8086.gen_cmp32_jmp2(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
2148       begin
2149         case cmp_op of
2150           OC_EQ:
2151             a_jmp_cond(list, OC_EQ, l_target);
2152           OC_GT:
2153             a_jmp_cond(list, OC_A, l_target);
2154           OC_LT:
2155             a_jmp_cond(list, OC_B, l_target);
2156           OC_GTE:
2157             a_jmp_cond(list, OC_AE, l_target);
2158           OC_LTE:
2159             a_jmp_cond(list, OC_BE, l_target);
2160           OC_NE:
2161             a_jmp_cond(list, OC_NE, l_target);
2162           OC_BE:
2163             a_jmp_cond(list, OC_BE, l_target);
2164           OC_B:
2165             a_jmp_cond(list, OC_B, l_target);
2166           OC_AE:
2167             a_jmp_cond(list, OC_AE, l_target);
2168           OC_A:
2169             a_jmp_cond(list, OC_A, l_target);
2170           else
2171             internalerror(2014010306);
2172         end;
2173       end;
2174 
2175 
2176     procedure tcg8086.g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);
2177       var
2178         ai : taicpu;
2179         hreg16 : tregister;
2180         hl_skip: TAsmLabel;
2181         invf: TResFlags;
2182         tmpsize: TCgSize;
2183         tmpopsize: topsize;
2184       begin
2185         { optimized case for the carry flag, using ADC/RCL }
2186         if f in [F_C,F_B,F_FB] then
2187           begin
2188             case size of
2189               OS_8,OS_S8:
2190                 begin
2191                   tmpsize:=OS_8;
2192                   tmpopsize:=S_B;
2193                 end;
2194               OS_16,OS_S16,OS_32,OS_S32:
2195                 begin
2196                   tmpsize:=OS_16;
2197                   tmpopsize:=S_W;
2198                 end;
2199               else
2200                 internalerror(2013123101);
2201             end;
2202             list.concat(Taicpu.op_const_reg(A_MOV, tmpopsize, 0, reg));
2203             hl_skip:=nil;
2204             if f=F_FB then
2205               begin
2206                 current_asmdata.getjumplabel(hl_skip);
2207                 ai:=Taicpu.op_sym(A_Jcc,S_NO,hl_skip);
2208                 ai.SetCondition(C_P);
2209                 ai.is_jmp:=true;
2210                 list.concat(ai);
2211               end;
2212             { RCL is faster than ADC on 8086/8088. On the 80286, it is
2213               equally fast and it also has the same size. In these cases,
2214               we still prefer it over ADC, because it's a better choice in
2215               case the register is spilled. }
2216             if (cs_opt_size in current_settings.optimizerswitches) or
2217                (current_settings.optimizecputype<=cpu_286) then
2218               list.concat(Taicpu.op_const_reg(A_RCL, tmpopsize, 1, reg))
2219             else
2220               { ADC is much faster on the 386. }
2221               list.concat(Taicpu.op_reg_reg(A_ADC, tmpopsize, reg, reg));
2222             if f=F_FB then
2223               a_label(list,hl_skip);
2224             a_load_reg_reg(list,tmpsize,size,reg,reg);
2225           end
2226         { optimized case for the inverted carry flag, using SBB }
2227         else if f in [F_NC,F_AE,F_FAE] then
2228           begin
2229             case size of
2230               OS_8,OS_S8:
2231                 begin
2232                   tmpsize:=OS_8;
2233                   list.concat(Taicpu.op_const_reg(A_MOV, S_B, 1, reg));
2234                   list.concat(Taicpu.op_const_reg(A_SBB, S_B, 0, reg));
2235                 end;
2236               OS_16,OS_S16,OS_32,OS_S32:
2237                 begin
2238                   tmpsize:=OS_16;
2239                   list.concat(Taicpu.op_const_reg(A_MOV, S_W, 1, reg));
2240                   list.concat(Taicpu.op_const_reg(A_SBB, S_W, 0, reg));
2241                 end;
2242               else
2243                 internalerror(2013123101);
2244             end;
2245             a_load_reg_reg(list,tmpsize,size,reg,reg);
2246           end
2247         else
2248           begin
2249             invf := f;
2250             inverse_flags(invf);
2251 
2252             case size of
2253               OS_8,OS_S8:
2254                 begin
2255                   tmpsize:=OS_8;
2256                   list.concat(Taicpu.op_const_reg(A_MOV, S_B, 0, reg));
2257                 end;
2258               OS_16,OS_S16,OS_32,OS_S32:
2259                 begin
2260                   tmpsize:=OS_16;
2261                   list.concat(Taicpu.op_const_reg(A_MOV, S_W, 0, reg));
2262                 end;
2263               else
2264                 internalerror(2013123101);
2265             end;
2266 
2267             current_asmdata.getjumplabel(hl_skip);
2268             { we can't just forward invf to a_jmp_flags for FA,FAE,FB and FBE, because
2269               in the case of NaNs:
2270                not(F_FA )<>F_FBE
2271                not(F_FAE)<>F_FB
2272                not(F_FB )<>F_FAE
2273                not(F_FBE)<>F_FA
2274             }
2275             case f of
2276               F_FA:
2277                 invf:=FPUFlags2Flags[invf];
2278               F_FAE,F_FB:
2279                 { F_FAE and F_FB are handled above, using ADC/RCL/SBB }
2280                 internalerror(2015102101);
2281               F_FBE:
2282                 begin
2283                   ai:=Taicpu.op_sym(A_Jcc,S_NO,hl_skip);
2284                   ai.SetCondition(C_P);
2285                   ai.is_jmp:=true;
2286                   list.concat(ai);
2287                   invf:=FPUFlags2Flags[invf];
2288                 end;
2289             end;
2290             a_jmp_flags(list,invf,hl_skip);
2291 
2292             { 16-bit INC is shorter than 8-bit }
2293             hreg16:=makeregsize(list,reg,OS_16);
2294             list.concat(Taicpu.op_reg(A_INC, S_W, hreg16));
2295             makeregsize(list,hreg16,tmpsize);
2296 
2297             a_label(list,hl_skip);
2298 
2299             a_load_reg_reg(list,tmpsize,size,reg,reg);
2300           end;
2301       end;
2302 
2303 
2304     procedure tcg8086.g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);
2305       var
2306         tmpreg : tregister;
2307         tmpregsize: TCgSize;
2308         tmpref: treference;
2309       begin
2310         if size in [OS_8,OS_S8,OS_16,OS_S16] then
2311           tmpregsize:=size
2312         else
2313           tmpregsize:=OS_16;
2314         tmpreg:=getintregister(list,tmpregsize);
2315         g_flags2reg(list,tmpregsize,f,tmpreg);
2316 
2317         tmpref:=ref;
2318         make_simple_ref(list,tmpref);
2319         if size in [OS_64,OS_S64] then
2320           begin
2321             a_load_reg_ref(list,tmpregsize,OS_32,tmpreg,tmpref);
2322             inc(tmpref.offset,4);
2323             a_load_const_ref(list,OS_32,0,tmpref);
2324           end
2325         else
2326           a_load_reg_ref(list,tmpregsize,size,tmpreg,tmpref);
2327       end;
2328 
2329 
2330     procedure tcg8086.g_stackpointer_alloc(list : TAsmList;localsize: longint);
2331       begin
2332         if cs_check_stack in current_settings.localswitches then
2333           begin
2334             cg.getcpuregister(list,NR_AX);
2335             cg.a_load_const_reg(list,OS_16, localsize,NR_AX);
2336             cg.a_call_name(list,'FPC_STACKCHECK_I8086',false);
2337             cg.ungetcpuregister(list, NR_AX);
2338           end;
2339         if localsize>0 then
2340           list.concat(Taicpu.Op_const_reg(A_SUB,S_W,localsize,NR_STACK_POINTER_REG));
2341       end;
2342 
2343 
2344     procedure tcg8086.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
2345       var
2346         stacksize : longint;
2347         ret_instr: TAsmOp;
2348         sp_moved : boolean;
2349 
2350       procedure maybe_move_sp;
2351         var
2352           ref : treference;
2353         begin
2354           if sp_moved then
2355             exit;
2356           if not(pi_has_open_array_parameter in current_procinfo.flags) then
2357             exit;
2358           { Restore SP position before SP change }
2359           if current_settings.x86memorymodel=mm_huge then
2360             stacksize:=stacksize + 2;
2361           reference_reset_base(ref,NR_BP,-stacksize,ctempposinvalid,2,[]);
2362           list.concat(Taicpu.op_ref_reg(A_LEA,S_W,ref,NR_SP));
2363           sp_moved:=true;
2364         end;
2365 
2366       begin
2367         if is_proc_far(current_procinfo.procdef) then
2368           ret_instr:=A_RETF
2369         else
2370           ret_instr:=A_RET;
2371         { MMX needs to call EMMS }
2372         if assigned(rg[R_MMXREGISTER]) and
2373            (rg[R_MMXREGISTER].uses_registers) then
2374           list.concat(Taicpu.op_none(A_EMMS,S_NO));
2375 
2376         sp_moved:=false;
2377         { remove stackframe }
2378         if not nostackframe then
2379           begin
2380             stacksize:=current_procinfo.calc_stackframe_size;
2381             if (target_info.stackalign>4) and
2382                ((stacksize <> 0) or
2383                 (pi_do_call in current_procinfo.flags) or
2384                 { can't detect if a call in this case -> use nostackframe }
2385                 { if you (think you) know what you are doing              }
2386                 (po_assembler in current_procinfo.procdef.procoptions)) then
2387               stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
2388             if (po_exports in current_procinfo.procdef.procoptions) and
2389                (target_info.system=system_i8086_win16) then
2390               begin
2391                 maybe_move_sp;
2392                 list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DI));
2393                 list.concat(Taicpu.Op_reg(A_POP,S_W,NR_SI));
2394               end;
2395             if ((current_settings.x86memorymodel=mm_huge) and
2396                 not (po_interrupt in current_procinfo.procdef.procoptions)) or
2397                ((po_exports in current_procinfo.procdef.procoptions) and
2398                 (target_info.system=system_i8086_win16)) then
2399               begin
2400                 maybe_move_sp;
2401                 list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DS));
2402               end;
2403             if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
2404               begin
2405                 if (stacksize<>0) then
2406                   cg.a_op_const_reg(list,OP_ADD,OS_ADDR,stacksize,current_procinfo.framepointer);
2407               end
2408             else
2409               begin
2410                 generate_leave(list);
2411                 if ((ts_x86_far_procs_push_odd_bp in current_settings.targetswitches) or
2412                     ((po_exports in current_procinfo.procdef.procoptions) and
2413                      (target_info.system=system_i8086_win16))) and
2414                     is_proc_far(current_procinfo.procdef) then
2415                   cg.a_op_const_reg(list,OP_SUB,OS_ADDR,1,current_procinfo.framepointer);
2416               end;
2417             list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
2418           end;
2419 
2420         { return from interrupt }
2421         if po_interrupt in current_procinfo.procdef.procoptions then
2422           begin
2423             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_ES));
2424             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DS));
2425             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DI));
2426             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_SI));
2427             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DX));
2428             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_CX));
2429             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_BX));
2430             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_AX));
2431             list.concat(Taicpu.Op_none(A_IRET,S_NO));
2432           end
2433         { Routines with the poclearstack flag set use only a ret }
2434         else if (current_procinfo.procdef.proccalloption in clearstack_pocalls) and
2435                 (not paramanager.use_fixed_stack)  then
2436          begin
2437            { complex return values are removed from stack in C code PM }
2438            { but not on win32 }
2439            { and not for safecall with hidden exceptions, because the result }
2440            { wich contains the exception is passed in EAX }
2441            if (target_info.system <> system_i386_win32) and
2442               not ((current_procinfo.procdef.proccalloption = pocall_safecall) and
2443                (tf_safecall_exceptions in target_info.flags)) and
2444               paramanager.ret_in_param(current_procinfo.procdef.returndef,
2445                                        current_procinfo.procdef) then
2446              list.concat(Taicpu.Op_const(ret_instr,S_W,sizeof(aint)))
2447            else
2448              list.concat(Taicpu.Op_none(ret_instr,S_NO));
2449          end
2450         { ... also routines with parasize=0 }
2451         else if (parasize=0) then
2452          list.concat(Taicpu.Op_none(ret_instr,S_NO))
2453         else
2454          begin
2455            { parameters are limited to 65535 bytes because ret allows only imm16 }
2456            if (parasize>65535) then
2457              CGMessage(cg_e_parasize_too_big);
2458            list.concat(Taicpu.Op_const(ret_instr,S_W,parasize));
2459          end;
2460       end;
2461 
2462 
2463     procedure tcg8086.g_copyvaluepara_openarray(list : TAsmList;const ref:treference;const lenloc:tlocation;elesize:tcgint;destreg:tregister);
2464       var
2465         power  : longint;
2466         opsize : topsize;
2467         saved_ds: Boolean;
2468       begin
2469         { get stack space }
2470         getcpuregister(list,NR_DI);
2471         a_load_loc_reg(list,OS_INT,lenloc,NR_DI);
2472         list.concat(Taicpu.op_reg(A_INC,S_W,NR_DI));
2473         { Now DI contains (high+1). }
2474 
2475         include(current_procinfo.flags, pi_has_open_array_parameter);
2476 
2477         { special case handling for elesize=2:
2478           set CX = (high+1) instead of CX = (high+1)*elesize.
2479 
2480           This allows us to avoid the SHR later. }
2481         if elesize=2 then
2482           begin
2483             { Now DI contains (high+1). Copy it to CX for later use. }
2484             getcpuregister(list,NR_CX);
2485             list.concat(Taicpu.op_reg_reg(A_MOV,S_W,NR_DI,NR_CX));
2486           end;
2487         { DI := DI * elesize }
2488         if (elesize<>1) then
2489          begin
2490            if ispowerof2(elesize, power) then
2491              a_op_const_reg(list,OP_SHL,OS_16,power,NR_DI)
2492            else
2493              a_op_const_reg(list,OP_IMUL,OS_16,elesize,NR_DI);
2494          end;
2495         if elesize<>2 then
2496           begin
2497             { Now DI contains (high+1)*elesize. Copy it to CX for later use. }
2498             getcpuregister(list,NR_CX);
2499             list.concat(Taicpu.op_reg_reg(A_MOV,S_W,NR_DI,NR_CX));
2500           end;
2501         { If we were probing pages, EDI=(size mod pagesize) and ESP is decremented
2502           by (size div pagesize)*pagesize, otherwise EDI=size.
2503           Either way, subtracting EDI from ESP will set ESP to desired final value. }
2504         list.concat(Taicpu.op_reg_reg(A_SUB,S_W,NR_DI,NR_SP));
2505         { align stack on 2 bytes }
2506         list.concat(Taicpu.op_const_reg(A_AND,S_W,aint($fffe),NR_SP));
2507         { load destination, don't use a_load_reg_reg, that will add a move instruction
2508           that can confuse the reg allocator }
2509         list.concat(Taicpu.Op_reg_reg(A_MOV,S_W,NR_SP,NR_DI));
2510 
2511 {$ifdef volatile_es}
2512         list.concat(taicpu.op_reg(A_PUSH,S_W,NR_SS));
2513         list.concat(taicpu.op_reg(A_POP,S_W,NR_ES));
2514 {$endif volatile_es}
2515 
2516         { Allocate SI and load it with source }
2517         getcpuregister(list,NR_SI);
2518         if ((ref.segment=NR_NO) and (segment_regs_equal(NR_SS,NR_DS) or (ref.base<>NR_BP))) or
2519            (is_segment_reg(ref.segment) and segment_regs_equal(ref.segment,NR_DS)) then
2520           begin
2521             hlcg.a_loadaddr_ref_reg(list,voidnearpointertype,voidnearpointertype,ref,NR_SI);
2522             saved_ds:=false;
2523           end
2524         else
2525           begin
2526             hlcg.a_loadaddr_ref_reg(list,voidnearpointertype,voidnearpointertype,ref,NR_SI);
2527             list.concat(taicpu.op_reg(A_PUSH,S_W,NR_DS));
2528             saved_ds:=true;
2529             if ref.segment<>NR_NO then
2530               list.concat(taicpu.op_reg(A_PUSH,S_W,ref.segment))
2531             else if ref.base=NR_BP then
2532               list.concat(taicpu.op_reg(A_PUSH,S_W,NR_SS))
2533             else
2534               internalerror(2014040403);
2535             list.concat(taicpu.op_reg(A_POP,S_W,NR_DS));
2536           end;
2537 
2538         { calculate size }
2539         opsize:=S_B;
2540         if elesize=2 then
2541           begin
2542             opsize:=S_W;
2543             { CX is already number of words, so no need to SHL/SHR }
2544           end
2545         else if (elesize and 1)=0 then
2546           begin
2547             opsize:=S_W;
2548             { CX is number of bytes, convert to words }
2549             list.concat(Taicpu.op_const_reg(A_SHR,S_W,1,NR_CX))
2550           end;
2551 
2552         if ts_cld in current_settings.targetswitches then
2553           list.concat(Taicpu.op_none(A_CLD,S_NO));
2554         if (opsize=S_B) and not (cs_opt_size in current_settings.optimizerswitches) then
2555           begin
2556             { SHR CX,1 moves the lowest (odd/even) bit to the carry flag }
2557             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2558             list.concat(Taicpu.op_const_reg(A_SHR,S_W,1,NR_CX));
2559             list.concat(Taicpu.op_none(A_REP,S_NO));
2560             list.concat(Taicpu.op_none(A_MOVSW,S_NO));
2561             { ADC CX,CX will set CX to 1 if the number of bytes was odd }
2562             list.concat(Taicpu.op_reg_reg(A_ADC,S_W,NR_CX,NR_CX));
2563             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2564             list.concat(Taicpu.op_none(A_REP,S_NO));
2565             list.concat(Taicpu.op_none(A_MOVSB,S_NO));
2566           end
2567         else
2568           begin
2569             list.concat(Taicpu.op_none(A_REP,S_NO));
2570             case opsize of
2571               S_B : list.concat(Taicpu.Op_none(A_MOVSB,S_NO));
2572               S_W : list.concat(Taicpu.Op_none(A_MOVSW,S_NO));
2573             end;
2574           end;
2575         ungetcpuregister(list,NR_DI);
2576         ungetcpuregister(list,NR_CX);
2577         ungetcpuregister(list,NR_SI);
2578         if saved_ds then
2579           list.concat(taicpu.op_reg(A_POP,S_W,NR_DS));
2580 
2581         { patch the new address, but don't use a_load_reg_reg, that will add a move instruction
2582           that can confuse the reg allocator }
2583         list.concat(Taicpu.Op_reg_reg(A_MOV,S_W,NR_SP,destreg));
2584         if current_settings.x86memorymodel in x86_far_data_models then
2585           list.concat(Taicpu.Op_reg_reg(A_MOV,S_W,NR_SS,GetNextReg(destreg)));
2586       end;
2587 
2588 
2589     procedure tcg8086.g_releasevaluepara_openarray(list : TAsmList;const l:tlocation);
2590       begin
2591         { Nothing to do }
2592       end;
2593 
2594 
2595     procedure tcg8086.get_32bit_ops(op: TOpCG; out op1, op2: TAsmOp);
2596       begin
2597         case op of
2598           OP_ADD :
2599             begin
2600               op1:=A_ADD;
2601               op2:=A_ADC;
2602             end;
2603           OP_SUB :
2604             begin
2605               op1:=A_SUB;
2606               op2:=A_SBB;
2607             end;
2608           OP_XOR :
2609             begin
2610               op1:=A_XOR;
2611               op2:=A_XOR;
2612             end;
2613           OP_OR :
2614             begin
2615               op1:=A_OR;
2616               op2:=A_OR;
2617             end;
2618           OP_AND :
2619             begin
2620               op1:=A_AND;
2621               op2:=A_AND;
2622             end;
2623           else
2624             internalerror(200203241);
2625         end;
2626       end;
2627 
2628 
2629     procedure tcg8086.add_move_instruction(instr: Taicpu);
2630       begin
2631         { HACK: when regvars are on, don't notify the register allocator of any
2632           direct moves to BX, so it doesn't try to coalesce them. Currently,
2633           direct moves to BX are only used when returning an int64 value in
2634           AX:BX:CX:DX. This hack fixes a common issue with functions, returning
2635           int64, for example:
2636 
2637         function RandomFrom(const AValues: array of Int64): Int64;
2638           begin
2639             result:=AValues[random(High(AValues)+1)];
2640           end;
2641 
2642     	push	bp
2643     	mov	bp,sp
2644 ; Var AValues located in register ireg20w
2645 ; Var $highAVALUES located in register ireg21w
2646 ; Var $result located in register ireg33w:ireg32w:ireg31w:ireg30w
2647     	mov	ireg20w,word [bp+6]
2648     	mov	ireg21w,word [bp+4]
2649 ; [3] result:=AValues[random(High(AValues)+1)];
2650     	mov	ireg22w,ireg21w
2651     	inc	ireg22w
2652     	mov	ax,ireg22w
2653     	cwd
2654     	mov	ireg23w,ax
2655     	mov	ireg24w,dx
2656     	push	ireg24w
2657     	push	ireg23w
2658     	call	SYSTEM_$$_RANDOM$LONGINT$$LONGINT
2659     	mov	ireg25w,ax
2660     	mov	ireg26w,dx
2661     	mov	ireg27w,ireg25w
2662     	mov	ireg28w,ireg27w
2663     	mov	ireg29w,ireg28w
2664     	mov	cl,3
2665     	shl	ireg29w,cl
2666 ; Var $result located in register ireg32w:ireg30w
2667     	mov	ireg30w,word [ireg20w+ireg29w]
2668     	mov	ireg31w,word [ireg20w+ireg29w+2]
2669     	mov	ireg32w,word [ireg20w+ireg29w+4]  ; problematic section start
2670     	mov	ireg33w,word [ireg20w+ireg29w+6]
2671 ; [4] end;
2672     	mov	bx,ireg32w  ; problematic section end
2673     	mov	ax,ireg33w
2674     	mov	dx,ireg30w
2675     	mov	cx,ireg31w
2676     	mov	sp,bp
2677     	pop	bp
2678     	ret	4
2679 
2680         the problem arises, because the register allocator tries to coalesce
2681           mov bx,ireg32w
2682         however, in the references [ireg20w+ireg29w+const], due to the
2683         constraints of i8086, ireg20w can only be BX (or BP, which isn't available
2684         to the register allocator, because it's used as a base pointer) }
2685 
2686         if (cs_opt_regvar in current_settings.optimizerswitches) and
2687            (instr.opcode=A_MOV) and (instr.ops=2) and
2688            (instr.oper[1]^.typ=top_reg) and (getsupreg(instr.oper[1]^.reg)=RS_BX) then
2689           exit
2690         else
2691           inherited add_move_instruction(instr);
2692       end;
2693 
2694 
2695     procedure tcg8086.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
2696       var
2697         hsym : tsym;
2698         href : treference;
2699         paraloc : Pcgparalocation;
2700         return_address_size: Integer;
2701       begin
2702         if current_settings.x86memorymodel in x86_far_code_models then
2703           return_address_size:=4
2704         else
2705           return_address_size:=2;
2706         { calculate the parameter info for the procdef }
2707         procdef.init_paraloc_info(callerside);
2708         hsym:=tsym(procdef.parast.Find('self'));
2709         if not(assigned(hsym) and
2710                (hsym.typ=paravarsym)) then
2711           internalerror(200305251);
2712         paraloc:=tparavarsym(hsym).paraloc[callerside].location;
2713         with paraloc^ do
2714           begin
2715             case loc of
2716               LOC_REGISTER:
2717                 a_op_const_reg(list,OP_SUB,size,ioffset,register);
2718               LOC_REFERENCE:
2719                 begin
2720                   { offset in the wrapper needs to be adjusted for the stored
2721                     return address }
2722                   if (reference.index<>NR_BP) and (reference.index<>NR_BX) and (reference.index<>NR_DI)
2723                     and (reference.index<>NR_SI) then
2724                     begin
2725                       list.concat(taicpu.op_reg(A_PUSH,S_W,NR_DI));
2726                       list.concat(taicpu.op_reg_reg(A_MOV,S_W,reference.index,NR_DI));
2727 
2728                       if reference.index=NR_SP then
2729                         reference_reset_base(href,NR_DI,reference.offset+return_address_size+2,ctempposinvalid,sizeof(pint),[])
2730                       else
2731                         reference_reset_base(href,NR_DI,reference.offset+return_address_size,ctempposinvalid,sizeof(pint),[]);
2732                       href.segment:=NR_SS;
2733                       a_op_const_ref(list,OP_SUB,size,ioffset,href);
2734                       list.concat(taicpu.op_reg(A_POP,S_W,NR_DI));
2735                     end
2736                   else
2737                     begin
2738                       reference_reset_base(href,reference.index,reference.offset+return_address_size,ctempposinvalid,sizeof(pint),[]);
2739                       href.segment:=NR_SS;
2740                       a_op_const_ref(list,OP_SUB,size,ioffset,href);
2741                     end;
2742                 end
2743               else
2744                 internalerror(200309189);
2745             end;
2746             paraloc:=next;
2747           end;
2748       end;
2749 
2750 
2751 { ************* 64bit operations ************ }
2752 
2753     procedure tcg64f8086.get_64bit_ops(op:TOpCG;var op1,op2:TAsmOp);
2754       begin
2755         case op of
2756           OP_ADD :
2757             begin
2758               op1:=A_ADD;
2759               op2:=A_ADC;
2760             end;
2761           OP_SUB :
2762             begin
2763               op1:=A_SUB;
2764               op2:=A_SBB;
2765             end;
2766           OP_XOR :
2767             begin
2768               op1:=A_XOR;
2769               op2:=A_XOR;
2770             end;
2771           OP_OR :
2772             begin
2773               op1:=A_OR;
2774               op2:=A_OR;
2775             end;
2776           OP_AND :
2777             begin
2778               op1:=A_AND;
2779               op2:=A_AND;
2780             end;
2781           else
2782             internalerror(200203241);
2783         end;
2784       end;
2785 
2786 
2787     procedure tcg64f8086.a_op64_ref_reg(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference;reg : tregister64);
2788       var
2789         op1,op2 : TAsmOp;
2790         tempref : treference;
2791       begin
2792         if not(op in [OP_NEG,OP_NOT]) then
2793           begin
2794             get_64bit_ops(op,op1,op2);
2795             tempref:=ref;
2796             tcgx86(cg).make_simple_ref(list,tempref);
2797             if op in [OP_ADD,OP_SUB] then
2798               cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2799             list.concat(taicpu.op_ref_reg(op1,S_W,tempref,reg.reglo));
2800             inc(tempref.offset,2);
2801             list.concat(taicpu.op_ref_reg(op2,S_W,tempref,cg.GetNextReg(reg.reglo)));
2802             inc(tempref.offset,2);
2803             list.concat(taicpu.op_ref_reg(op2,S_W,tempref,reg.reghi));
2804             inc(tempref.offset,2);
2805             list.concat(taicpu.op_ref_reg(op2,S_W,tempref,cg.GetNextReg(reg.reghi)));
2806             if op in [OP_ADD,OP_SUB] then
2807               cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2808           end
2809         else
2810           begin
2811             a_load64_ref_reg(list,ref,reg);
2812             a_op64_reg_reg(list,op,size,reg,reg);
2813           end;
2814       end;
2815 
2816 
2817     procedure tcg64f8086.a_op64_reg_ref(list : TAsmList;op:TOpCG;size : tcgsize;reg : tregister64; const ref: treference);
2818       var
2819         op1,op2 : TAsmOp;
2820         tempref : treference;
2821       begin
2822         case op of
2823           OP_NOT:
2824             begin
2825               tempref:=ref;
2826               tcgx86(cg).make_simple_ref(list,tempref);
2827               list.concat(taicpu.op_ref(A_NOT,S_W,tempref));
2828               inc(tempref.offset,2);
2829               list.concat(taicpu.op_ref(A_NOT,S_W,tempref));
2830               inc(tempref.offset,2);
2831               list.concat(taicpu.op_ref(A_NOT,S_W,tempref));
2832               inc(tempref.offset,2);
2833               list.concat(taicpu.op_ref(A_NOT,S_W,tempref));
2834             end;
2835           OP_NEG:
2836             begin
2837               tempref:=ref;
2838               tcgx86(cg).make_simple_ref(list,tempref);
2839               inc(tempref.offset,6);
2840               list.concat(taicpu.op_ref(A_NOT,S_W,tempref));
2841               dec(tempref.offset,2);
2842               list.concat(taicpu.op_ref(A_NOT,S_W,tempref));
2843               dec(tempref.offset,2);
2844               list.concat(taicpu.op_ref(A_NOT,S_W,tempref));
2845               dec(tempref.offset,2);
2846               cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2847               list.concat(taicpu.op_ref(A_NEG,S_W,tempref));
2848               inc(tempref.offset,2);
2849               list.concat(taicpu.op_const_ref(A_SBB,S_W,-1,tempref));
2850               inc(tempref.offset,2);
2851               list.concat(taicpu.op_const_ref(A_SBB,S_W,-1,tempref));
2852               inc(tempref.offset,2);
2853               list.concat(taicpu.op_const_ref(A_SBB,S_W,-1,tempref));
2854               cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2855             end;
2856           else
2857             begin
2858               get_64bit_ops(op,op1,op2);
2859               tempref:=ref;
2860               tcgx86(cg).make_simple_ref(list,tempref);
2861               if op in [OP_ADD,OP_SUB] then
2862                 cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2863               list.concat(taicpu.op_reg_ref(op1,S_W,reg.reglo,tempref));
2864               inc(tempref.offset,2);
2865               list.concat(taicpu.op_reg_ref(op2,S_W,cg.GetNextReg(reg.reglo),tempref));
2866               inc(tempref.offset,2);
2867               list.concat(taicpu.op_reg_ref(op2,S_W,reg.reghi,tempref));
2868               inc(tempref.offset,2);
2869               list.concat(taicpu.op_reg_ref(op2,S_W,cg.GetNextReg(reg.reghi),tempref));
2870               if op in [OP_ADD,OP_SUB] then
2871                 cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2872             end;
2873         end;
2874       end;
2875 
2876 
2877     procedure tcg64f8086.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
2878       var
2879         op1,op2 : TAsmOp;
2880         l2, l3: TAsmLabel;
2881         ai: taicpu;
2882       begin
2883         case op of
2884           OP_NEG :
2885             begin
2886               if (regsrc.reglo<>regdst.reglo) then
2887                 a_load64_reg_reg(list,regsrc,regdst);
2888               cg.a_op_reg_reg(list,OP_NOT,OS_32,regdst.reghi,regdst.reghi);
2889               list.concat(taicpu.op_reg(A_NOT,S_W,cg.GetNextReg(regdst.reglo)));
2890               cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2891               list.concat(taicpu.op_reg(A_NEG,S_W,regdst.reglo));
2892               list.concat(taicpu.op_const_reg(A_SBB,S_W,-1,cg.GetNextReg(regdst.reglo)));
2893               list.concat(taicpu.op_const_reg(A_SBB,S_W,-1,regdst.reghi));
2894               list.concat(taicpu.op_const_reg(A_SBB,S_W,-1,cg.GetNextReg(regdst.reghi)));
2895               cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2896               exit;
2897             end;
2898           OP_NOT :
2899             begin
2900               if (regsrc.reglo<>regdst.reglo) then
2901                 a_load64_reg_reg(list,regsrc,regdst);
2902               cg.a_op_reg_reg(list,OP_NOT,OS_32,regdst.reglo,regdst.reglo);
2903               cg.a_op_reg_reg(list,OP_NOT,OS_32,regdst.reghi,regdst.reghi);
2904               exit;
2905             end;
2906           OP_SHR,OP_SHL,OP_SAR:
2907             begin
2908               { load right operators in a register }
2909               cg.getcpuregister(list,NR_CX);
2910 
2911               cg.a_load_reg_reg(list,OS_16,OS_16,regsrc.reglo,NR_CX);
2912 
2913               current_asmdata.getjumplabel(l2);
2914               current_asmdata.getjumplabel(l3);
2915               cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2916               list.concat(taicpu.op_const_reg(A_AND,S_W,63,NR_CX));
2917               cg.a_jmp_flags(list,F_E,l3);
2918               cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2919               cg.a_label(list,l2);
2920               case op of
2921                 OP_SHL:
2922                   begin
2923                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2924                     list.concat(taicpu.op_const_reg(A_SHL,S_W,1,regdst.reglo));
2925                     list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(regdst.reglo)));
2926                     list.concat(taicpu.op_const_reg(A_RCL,S_W,1,regdst.reghi));
2927                     list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(regdst.reghi)));
2928                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2929                   end;
2930                 OP_SHR,OP_SAR:
2931                   begin
2932                     cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2933                     cg.a_op_const_reg(list,op,OS_16,1,cg.GetNextReg(regdst.reghi));
2934                     list.concat(taicpu.op_const_reg(A_RCR,S_W,1,regdst.reghi));
2935                     list.concat(taicpu.op_const_reg(A_RCR,S_W,1,cg.GetNextReg(regdst.reglo)));
2936                     list.concat(taicpu.op_const_reg(A_RCR,S_W,1,regdst.reglo));
2937                     cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2938                   end;
2939               end;
2940               ai:=Taicpu.Op_Sym(A_LOOP,S_W,l2);
2941               ai.is_jmp := True;
2942               list.Concat(ai);
2943               cg.a_label(list,l3);
2944 
2945               cg.ungetcpuregister(list,NR_CX);
2946               exit;
2947             end;
2948         end;
2949         get_64bit_ops(op,op1,op2);
2950         if op in [OP_ADD,OP_SUB] then
2951           cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2952         list.concat(taicpu.op_reg_reg(op1,S_W,regsrc.reglo,regdst.reglo));
2953         list.concat(taicpu.op_reg_reg(op2,S_W,cg.GetNextReg(regsrc.reglo),cg.GetNextReg(regdst.reglo)));
2954         list.concat(taicpu.op_reg_reg(op2,S_W,regsrc.reghi,regdst.reghi));
2955         list.concat(taicpu.op_reg_reg(op2,S_W,cg.GetNextReg(regsrc.reghi),cg.GetNextReg(regdst.reghi)));
2956         if op in [OP_ADD,OP_SUB] then
2957           cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2958       end;
2959 
2960 
2961     procedure tcg64f8086.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
2962       var
2963         op1,op2 : TAsmOp;
2964         loop_start: TAsmLabel;
2965         ai: taicpu;
2966       begin
2967         case op of
2968           OP_AND,OP_OR,OP_XOR:
2969             begin
2970               cg.a_op_const_reg(list,op,OS_32,tcgint(lo(value)),reg.reglo);
2971               cg.a_op_const_reg(list,op,OS_32,tcgint(hi(value)),reg.reghi);
2972             end;
2973           OP_ADD, OP_SUB:
2974             begin
2975               get_64bit_ops(op,op1,op2);
2976               if (value and $ffffffffffff) = 0 then
2977                 begin
2978                   { use a_op_const_reg to allow the use of inc/dec }
2979                   cg.a_op_const_reg(list,op,OS_16,aint((value shr 48) and $ffff),cg.GetNextReg(reg.reghi));
2980                 end
2981               // can't use a_op_const_ref because this may use dec/inc
2982               else if (value and $ffffffff) = 0 then
2983                 begin
2984                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2985                   list.concat(taicpu.op_const_reg(op1,S_W,aint((value shr 32) and $ffff),reg.reghi));
2986                   list.concat(taicpu.op_const_reg(op2,S_W,aint((value shr 48) and $ffff),cg.GetNextReg(reg.reghi)));
2987                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2988                 end
2989               else if (value and $ffff) = 0 then
2990                 begin
2991                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
2992                   list.concat(taicpu.op_const_reg(op1,S_W,aint((value shr 16) and $ffff),cg.GetNextReg(reg.reglo)));
2993                   list.concat(taicpu.op_const_reg(op2,S_W,aint((value shr 32) and $ffff),reg.reghi));
2994                   list.concat(taicpu.op_const_reg(op2,S_W,aint((value shr 48) and $ffff),cg.GetNextReg(reg.reghi)));
2995                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
2996                 end
2997               else
2998                 begin
2999                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3000                   list.concat(taicpu.op_const_reg(op1,S_W,aint(value and $ffff),reg.reglo));
3001                   list.concat(taicpu.op_const_reg(op2,S_W,aint((value shr 16) and $ffff),cg.GetNextReg(reg.reglo)));
3002                   list.concat(taicpu.op_const_reg(op2,S_W,aint((value shr 32) and $ffff),reg.reghi));
3003                   list.concat(taicpu.op_const_reg(op2,S_W,aint((value shr 48) and $ffff),cg.GetNextReg(reg.reghi)));
3004                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3005                 end;
3006             end;
3007           OP_SHR,OP_SHL,OP_SAR:
3008             begin
3009               value:=value and 63;
3010               case value of
3011                 0:
3012                   { ultra hyper fast shift by 0 };
3013                 1:
3014                   case op of
3015                     OP_SHL:
3016                       begin
3017                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3018                         list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg.reglo));
3019                         list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(reg.reglo)));
3020                         list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
3021                         list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(reg.reghi)));
3022                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3023                       end;
3024                     OP_SHR,OP_SAR:
3025                       begin
3026                         cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3027                         cg.a_op_const_reg(list,op,OS_16,1,cg.GetNextReg(reg.reghi));
3028                         list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reghi));
3029                         list.concat(taicpu.op_const_reg(A_RCR,S_W,1,cg.GetNextReg(reg.reglo)));
3030                         list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
3031                         cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3032                       end;
3033                   end;
3034                 2..15:
3035                   begin
3036                     cg.getcpuregister(list,NR_CX);
3037                     cg.a_load_const_reg(list,OS_16,value,NR_CX);
3038                     current_asmdata.getjumplabel(loop_start);
3039                     cg.a_label(list,loop_start);
3040                     case op of
3041                       OP_SHL:
3042                         begin
3043                           cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3044                           list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg.reglo));
3045                           list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(reg.reglo)));
3046                           list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
3047                           list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(reg.reghi)));
3048                           cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3049                         end;
3050                       OP_SHR,OP_SAR:
3051                         begin
3052                           cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3053                           cg.a_op_const_reg(list,op,OS_16,1,cg.GetNextReg(reg.reghi));
3054                           list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reghi));
3055                           list.concat(taicpu.op_const_reg(A_RCR,S_W,1,cg.GetNextReg(reg.reglo)));
3056                           list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
3057                           cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3058                         end;
3059                     end;
3060                     ai:=Taicpu.Op_Sym(A_LOOP,S_W,loop_start);
3061                     ai.is_jmp := True;
3062                     list.Concat(ai);
3063                     cg.ungetcpuregister(list,NR_CX);
3064                   end;
3065                 16,17:
3066                   begin
3067                     case op of
3068                       OP_SHL:
3069                         begin
3070                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,cg.GetNextReg(reg.reghi));
3071                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),reg.reghi);
3072                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reglo,cg.GetNextReg(reg.reglo));
3073                           cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
3074                         end;
3075                       OP_SHR:
3076                         begin
3077                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),reg.reglo);
3078                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,cg.GetNextReg(reg.reglo));
3079                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),reg.reghi);
3080                           cg.a_op_reg_reg(list,OP_XOR,OS_16,cg.GetNextReg(reg.reghi),cg.GetNextReg(reg.reghi));
3081                         end;
3082                       OP_SAR:
3083                         begin
3084                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),reg.reglo);
3085                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,cg.GetNextReg(reg.reglo));
3086                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),reg.reghi);
3087                           cg.a_op_const_reg(list,OP_SAR,OS_16,15,cg.GetNextReg(reg.reghi));
3088                         end;
3089                     end;
3090                     if value=17 then
3091                       case op of
3092                         OP_SHL:
3093                           begin
3094                             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3095                             list.concat(taicpu.op_const_reg(A_SHL,S_W,1,cg.GetNextReg(reg.reglo)));
3096                             list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
3097                             list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(reg.reghi)));
3098                             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3099                           end;
3100                         OP_SHR,OP_SAR:
3101                           begin
3102                             cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3103                             cg.a_op_const_reg(list,op,OS_16,1,reg.reghi);
3104                             list.concat(taicpu.op_const_reg(A_RCR,S_W,1,cg.GetNextReg(reg.reglo)));
3105                             list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
3106                             cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3107                           end;
3108                       end;
3109                   end;
3110                 18..31:
3111                   begin
3112                     case op of
3113                       OP_SHL:
3114                         begin
3115                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,cg.GetNextReg(reg.reghi));
3116                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),reg.reghi);
3117                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reglo,cg.GetNextReg(reg.reglo));
3118                           cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
3119                         end;
3120                       OP_SHR:
3121                         begin
3122                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),reg.reglo);
3123                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,cg.GetNextReg(reg.reglo));
3124                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),reg.reghi);
3125                           cg.a_op_reg_reg(list,OP_XOR,OS_16,cg.GetNextReg(reg.reghi),cg.GetNextReg(reg.reghi));
3126                         end;
3127                       OP_SAR:
3128                         begin
3129                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),reg.reglo);
3130                           cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,cg.GetNextReg(reg.reglo));
3131                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),reg.reghi);
3132                           cg.a_op_const_reg(list,OP_SAR,OS_16,15,cg.GetNextReg(reg.reghi));
3133                         end;
3134                     end;
3135                     cg.getcpuregister(list,NR_CX);
3136                     cg.a_load_const_reg(list,OS_16,value-16,NR_CX);
3137                     current_asmdata.getjumplabel(loop_start);
3138                     cg.a_label(list,loop_start);
3139                     case op of
3140                       OP_SHL:
3141                         begin
3142                           cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3143                           list.concat(taicpu.op_const_reg(A_SHL,S_W,1,cg.GetNextReg(reg.reglo)));
3144                           list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
3145                           list.concat(taicpu.op_const_reg(A_RCL,S_W,1,cg.GetNextReg(reg.reghi)));
3146                           cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3147                         end;
3148                       OP_SHR,OP_SAR:
3149                         begin
3150                           cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3151                           cg.a_op_const_reg(list,op,OS_16,1,reg.reghi);
3152                           list.concat(taicpu.op_const_reg(A_RCR,S_W,1,cg.GetNextReg(reg.reglo)));
3153                           list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
3154                           cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3155                         end;
3156                     end;
3157                     ai:=Taicpu.Op_Sym(A_LOOP,S_W,loop_start);
3158                     ai.is_jmp := True;
3159                     list.Concat(ai);
3160                     cg.ungetcpuregister(list,NR_CX);
3161                   end;
3162                 32..47:
3163                   case op of
3164                     OP_SHL:
3165                       begin
3166                         cg.a_op_const_reg_reg(list,OP_SHL,OS_32,value-32,reg.reglo,reg.reghi);
3167                         cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
3168                         cg.a_op_reg_reg(list,OP_XOR,OS_16,cg.GetNextReg(reg.reglo),cg.GetNextReg(reg.reglo));
3169                       end;
3170                     OP_SHR:
3171                       begin
3172                         cg.a_op_const_reg_reg(list,OP_SHR,OS_32,value-32,reg.reghi,reg.reglo);
3173                         cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reghi,reg.reghi);
3174                         cg.a_op_reg_reg(list,OP_XOR,OS_16,cg.GetNextReg(reg.reghi),cg.GetNextReg(reg.reghi));
3175                       end;
3176                     OP_SAR:
3177                       begin
3178                         cg.a_op_const_reg_reg(list,OP_SAR,OS_32,value-32,reg.reghi,reg.reglo);
3179                         cg.a_op_const_reg_reg(list,OP_SAR,OS_16,15-(value-32),cg.GetNextReg(reg.reglo),reg.reghi);
3180                         cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,cg.GetNextReg(reg.reghi));
3181                       end;
3182                   end;
3183                 48..63:
3184                   case op of
3185                     OP_SHL:
3186                       begin
3187                         cg.a_load_reg_reg(list,OS_16,OS_16,reg.reglo,cg.GetNextReg(reg.reghi));
3188                         cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
3189                         cg.a_op_reg_reg(list,OP_XOR,OS_16,cg.GetNextReg(reg.reglo),cg.GetNextReg(reg.reglo));
3190                         cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reghi,reg.reghi);
3191                         cg.a_op_const_reg(list,OP_SHL,OS_16,value-48,cg.GetNextReg(reg.reghi));
3192                       end;
3193                     OP_SHR:
3194                       begin
3195                         cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),reg.reglo);
3196                         cg.a_op_reg_reg(list,OP_XOR,OS_16,cg.GetNextReg(reg.reghi),cg.GetNextReg(reg.reghi));
3197                         cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reghi,reg.reghi);
3198                         cg.a_op_reg_reg(list,OP_XOR,OS_16,cg.GetNextReg(reg.reglo),cg.GetNextReg(reg.reglo));
3199                         cg.a_op_const_reg(list,OP_SHR,OS_16,value-48,reg.reglo);
3200                       end;
3201                     OP_SAR:
3202                       if value=63 then
3203                         begin
3204                           cg.a_op_const_reg(list,OP_SAR,OS_16,15,cg.GetNextReg(reg.reghi));
3205                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),reg.reghi);
3206                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),cg.GetNextReg(reg.reglo));
3207                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reghi),reg.reglo);
3208                         end
3209                       else
3210                         begin
3211                           cg.a_op_const_reg_reg(list,OP_SAR,OS_16,value-48,cg.GetNextReg(reg.reghi),reg.reglo);
3212                           cg.a_op_const_reg_reg(list,OP_SAR,OS_16,15-(value-48),reg.reglo,cg.GetNextReg(reg.reglo));
3213                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),reg.reghi);
3214                           cg.a_load_reg_reg(list,OS_16,OS_16,cg.GetNextReg(reg.reglo),cg.GetNextReg(reg.reghi));
3215                         end;
3216                   end;
3217               end;
3218             end;
3219           else
3220             internalerror(200204021);
3221         end;
3222       end;
3223 
3224 
3225     procedure tcg64f8086.a_op64_const_ref(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const ref : treference);
3226       var
3227         op1,op2 : TAsmOp;
3228         tempref : treference;
3229       begin
3230         tempref:=ref;
3231         tcgx86(cg).make_simple_ref(list,tempref);
3232         case op of
3233           OP_AND,OP_OR,OP_XOR:
3234             begin
3235               cg.a_op_const_ref(list,op,OS_32,tcgint(lo(value)),tempref);
3236               inc(tempref.offset,4);
3237               cg.a_op_const_ref(list,op,OS_32,tcgint(hi(value)),tempref);
3238             end;
3239           OP_ADD, OP_SUB:
3240             begin
3241               get_64bit_ops(op,op1,op2);
3242               if (value and $ffffffffffff) = 0 then
3243                 begin
3244                   inc(tempref.offset,6);
3245                   { use a_op_const_ref to allow the use of inc/dec }
3246                   cg.a_op_const_ref(list,op,OS_16,aint((value shr 48) and $ffff),tempref);
3247                 end
3248               // can't use a_op_const_ref because this may use dec/inc
3249               else if (value and $ffffffff) = 0 then
3250                 begin
3251                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3252                   inc(tempref.offset,4);
3253                   list.concat(taicpu.op_const_ref(op1,S_W,aint((value shr 32) and $ffff),tempref));
3254                   inc(tempref.offset,2);
3255                   list.concat(taicpu.op_const_ref(op2,S_W,aint((value shr 48) and $ffff),tempref));
3256                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3257                 end
3258               else if (value and $ffff) = 0 then
3259                 begin
3260                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3261                   inc(tempref.offset,2);
3262                   list.concat(taicpu.op_const_ref(op1,S_W,aint((value shr 16) and $ffff),tempref));
3263                   inc(tempref.offset,2);
3264                   list.concat(taicpu.op_const_ref(op2,S_W,aint((value shr 32) and $ffff),tempref));
3265                   inc(tempref.offset,2);
3266                   list.concat(taicpu.op_const_ref(op2,S_W,aint((value shr 48) and $ffff),tempref));
3267                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3268                 end
3269               else
3270                 begin
3271                   cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
3272                   list.concat(taicpu.op_const_ref(op1,S_W,aint(value and $ffff),tempref));
3273                   inc(tempref.offset,2);
3274                   list.concat(taicpu.op_const_ref(op2,S_W,aint((value shr 16) and $ffff),tempref));
3275                   inc(tempref.offset,2);
3276                   list.concat(taicpu.op_const_ref(op2,S_W,aint((value shr 32) and $ffff),tempref));
3277                   inc(tempref.offset,2);
3278                   list.concat(taicpu.op_const_ref(op2,S_W,aint((value shr 48) and $ffff),tempref));
3279                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
3280                 end;
3281             end;
3282           else
3283             internalerror(200204022);
3284         end;
3285       end;
3286 
3287     procedure create_codegen;
3288       begin
3289         cg := tcg8086.create;
3290         cg64 := tcg64f8086.create;
3291       end;
3292 
3293 end.
3294