Lines Matching refs:compiler

22 static void orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update);
25 static void orc_compiler_sse_init (OrcCompiler *compiler);
27 static void orc_compiler_sse_assemble (OrcCompiler *compiler);
29 void sse_load_constant (OrcCompiler *compiler, int reg, int size, int value);
30 void sse_load_constant_long (OrcCompiler *compiler, int reg,
136 orc_compiler_sse_init (OrcCompiler *compiler) in orc_compiler_sse_init() argument
140 if (compiler->target_flags & ORC_TARGET_SSE_64BIT) { in orc_compiler_sse_init()
141 compiler->is_64bit = TRUE; in orc_compiler_sse_init()
143 if (compiler->target_flags & ORC_TARGET_SSE_FRAME_POINTER) { in orc_compiler_sse_init()
144 compiler->use_frame_pointer = TRUE; in orc_compiler_sse_init()
146 if (!(compiler->target_flags & ORC_TARGET_SSE_SHORT_JUMPS)) { in orc_compiler_sse_init()
147 compiler->long_jumps = TRUE; in orc_compiler_sse_init()
151 if (compiler->is_64bit) { in orc_compiler_sse_init()
153 compiler->valid_regs[i] = 1; in orc_compiler_sse_init()
155 compiler->valid_regs[X86_ESP] = 0; in orc_compiler_sse_init()
158 compiler->valid_regs[i] = 1; in orc_compiler_sse_init()
162 compiler->valid_regs[i] = 1; in orc_compiler_sse_init()
165 compiler->save_regs[X86_EBX] = 1; in orc_compiler_sse_init()
166 compiler->save_regs[X86_EBP] = 1; in orc_compiler_sse_init()
167 compiler->save_regs[X86_R12] = 1; in orc_compiler_sse_init()
168 compiler->save_regs[X86_R13] = 1; in orc_compiler_sse_init()
169 compiler->save_regs[X86_R14] = 1; in orc_compiler_sse_init()
170 compiler->save_regs[X86_R15] = 1; in orc_compiler_sse_init()
172 compiler->save_regs[X86_EDI] = 1; in orc_compiler_sse_init()
173 compiler->save_regs[X86_ESI] = 1; in orc_compiler_sse_init()
175 compiler->save_regs[i] = 1; in orc_compiler_sse_init()
180 compiler->valid_regs[i] = 1; in orc_compiler_sse_init()
182 compiler->valid_regs[X86_ESP] = 0; in orc_compiler_sse_init()
183 if (compiler->use_frame_pointer) { in orc_compiler_sse_init()
184 compiler->valid_regs[X86_EBP] = 0; in orc_compiler_sse_init()
187 compiler->valid_regs[i] = 1; in orc_compiler_sse_init()
189 compiler->save_regs[X86_EBX] = 1; in orc_compiler_sse_init()
190 compiler->save_regs[X86_EDI] = 1; in orc_compiler_sse_init()
191 compiler->save_regs[X86_EBP] = 1; in orc_compiler_sse_init()
194 compiler->alloc_regs[i] = 0; in orc_compiler_sse_init()
195 compiler->used_regs[i] = 0; in orc_compiler_sse_init()
198 if (compiler->is_64bit) { in orc_compiler_sse_init()
200 compiler->exec_reg = X86_ECX; in orc_compiler_sse_init()
201 compiler->gp_tmpreg = X86_EDX; in orc_compiler_sse_init()
203 compiler->exec_reg = X86_EDI; in orc_compiler_sse_init()
204 compiler->gp_tmpreg = X86_ECX; in orc_compiler_sse_init()
207 compiler->gp_tmpreg = X86_ECX; in orc_compiler_sse_init()
208 if (compiler->use_frame_pointer) { in orc_compiler_sse_init()
209 compiler->exec_reg = X86_EBX; in orc_compiler_sse_init()
211 compiler->exec_reg = X86_EBP; in orc_compiler_sse_init()
214 compiler->valid_regs[compiler->gp_tmpreg] = 0; in orc_compiler_sse_init()
215 compiler->valid_regs[compiler->exec_reg] = 0; in orc_compiler_sse_init()
217 switch (compiler->max_var_size) { in orc_compiler_sse_init()
219 compiler->loop_shift = 4; in orc_compiler_sse_init()
222 compiler->loop_shift = 3; in orc_compiler_sse_init()
225 compiler->loop_shift = 2; in orc_compiler_sse_init()
228 compiler->loop_shift = 1; in orc_compiler_sse_init()
231 ORC_ERROR("unhandled max var size %d", compiler->max_var_size); in orc_compiler_sse_init()
235 compiler->loop_shift--; in orc_compiler_sse_init()
241 if (compiler->n_insns <= 10) { in orc_compiler_sse_init()
242 compiler->unroll_shift = 1; in orc_compiler_sse_init()
244 if (!compiler->long_jumps) { in orc_compiler_sse_init()
245 compiler->unroll_shift = 0; in orc_compiler_sse_init()
247 if (compiler->loop_shift == 0) { in orc_compiler_sse_init()
249 compiler->unroll_shift = 0; in orc_compiler_sse_init()
251 compiler->alloc_loop_counter = TRUE; in orc_compiler_sse_init()
252 compiler->allow_gp_on_stack = TRUE; in orc_compiler_sse_init()
255 for(i=0;i<compiler->n_insns;i++){ in orc_compiler_sse_init()
256 OrcInstruction *insn = compiler->insns + i; in orc_compiler_sse_init()
263 compiler->vars[insn->src_args[0]].need_offset_reg = TRUE; in orc_compiler_sse_init()
270 sse_save_accumulators (OrcCompiler *compiler) in sse_save_accumulators() argument
277 OrcVariable *var = compiler->vars + i; in sse_save_accumulators()
283 tmp = orc_compiler_get_temp_reg (compiler); in sse_save_accumulators()
286 orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(3,2,3,2), src, tmp); in sse_save_accumulators()
288 orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(3,2,3,2), src, tmp); in sse_save_accumulators()
292 orc_sse_emit_paddw (compiler, tmp, src); in sse_save_accumulators()
294 orc_sse_emit_paddd (compiler, tmp, src); in sse_save_accumulators()
298 orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,1,1,1), src, tmp); in sse_save_accumulators()
301 orc_sse_emit_paddw (compiler, tmp, src); in sse_save_accumulators()
303 orc_sse_emit_paddd (compiler, tmp, src); in sse_save_accumulators()
309 orc_sse_emit_pshuflw (compiler, ORC_SSE_SHUF(1,1,1,1), src, tmp); in sse_save_accumulators()
311 orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,1,1,1), src, tmp); in sse_save_accumulators()
314 orc_sse_emit_paddw (compiler, tmp, src); in sse_save_accumulators()
318 orc_sse_emit_movd_store_register (compiler, src, compiler->gp_tmpreg); in sse_save_accumulators()
319 orc_x86_emit_and_imm_reg (compiler, 4, 0xffff, compiler->gp_tmpreg); in sse_save_accumulators()
320 orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, in sse_save_accumulators()
322 compiler->exec_reg); in sse_save_accumulators()
324 orc_x86_emit_mov_sse_memoffset (compiler, 4, src, in sse_save_accumulators()
326 compiler->exec_reg, in sse_save_accumulators()
338 sse_load_constant (OrcCompiler *compiler, int reg, int size, int value) in sse_load_constant() argument
340 orc_sse_load_constant (compiler, reg, size, value); in sse_load_constant()
344 orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 value) in orc_sse_load_constant() argument
352 orc_x86_emit_mov_imm_reg (compiler, 4, value>>0, in orc_sse_load_constant()
353 compiler->gp_tmpreg); in orc_sse_load_constant()
354 orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, in orc_sse_load_constant()
355 offset + 0, compiler->exec_reg); in orc_sse_load_constant()
357 orc_x86_emit_mov_imm_reg (compiler, 4, value>>32, in orc_sse_load_constant()
358 compiler->gp_tmpreg); in orc_sse_load_constant()
359 orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, in orc_sse_load_constant()
360 offset + 4, compiler->exec_reg); in orc_sse_load_constant()
362 orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, compiler->exec_reg, in orc_sse_load_constant()
365 orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,0,1,0), reg, reg); in orc_sse_load_constant()
380 ORC_ASM_CODE(compiler, "# loading constant %d 0x%08x\n", (int)value, (int)value); in orc_sse_load_constant()
382 orc_sse_emit_pxor(compiler, reg, reg); in orc_sse_load_constant()
386 orc_sse_emit_pcmpeqb (compiler, reg, reg); in orc_sse_load_constant()
389 if (compiler->target_flags & ORC_TARGET_SSE_SSSE3) { in orc_sse_load_constant()
391 orc_sse_emit_pcmpeqb (compiler, reg, reg); in orc_sse_load_constant()
392 orc_sse_emit_pabsb (compiler, reg, reg); in orc_sse_load_constant()
401 orc_sse_emit_pcmpeqb (compiler, reg, reg); in orc_sse_load_constant()
402 orc_sse_emit_pslld_imm (compiler, i, reg); in orc_sse_load_constant()
407 orc_sse_emit_pcmpeqb (compiler, reg, reg); in orc_sse_load_constant()
408 orc_sse_emit_psrld_imm (compiler, i, reg); in orc_sse_load_constant()
416 orc_sse_emit_pcmpeqb (compiler, reg, reg); in orc_sse_load_constant()
417 orc_sse_emit_psllw_imm (compiler, i, reg); in orc_sse_load_constant()
422 orc_sse_emit_pcmpeqb (compiler, reg, reg); in orc_sse_load_constant()
423 orc_sse_emit_psrlw_imm (compiler, i, reg); in orc_sse_load_constant()
428 orc_x86_emit_mov_imm_reg (compiler, 4, value, compiler->gp_tmpreg); in orc_sse_load_constant()
429 orc_sse_emit_movd_load_register (compiler, compiler->gp_tmpreg, reg); in orc_sse_load_constant()
431 orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(0,0,0,0), reg, reg); in orc_sse_load_constant()
433 orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,0,1,0), reg, reg); in orc_sse_load_constant()
438 sse_load_constant_long (OrcCompiler *compiler, int reg, in sse_load_constant_long() argument
446 ORC_ASM_CODE(compiler, "# loading constant %08x %08x %08x %08x\n", in sse_load_constant_long()
451 orc_x86_emit_mov_imm_reg (compiler, 4, constant->full_value[i], in sse_load_constant_long()
452 compiler->gp_tmpreg); in sse_load_constant_long()
453 orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, in sse_load_constant_long()
454 offset + 4*i, compiler->exec_reg); in sse_load_constant_long()
456 orc_x86_emit_mov_memoffset_sse (compiler, 16, offset, compiler->exec_reg, in sse_load_constant_long()
462 sse_load_constants_outer (OrcCompiler *compiler) in sse_load_constants_outer() argument
466 if (compiler->vars[i].name == NULL) continue; in sse_load_constants_outer()
467 switch (compiler->vars[i].vartype) { in sse_load_constants_outer()
476 orc_sse_emit_pxor (compiler, in sse_load_constants_outer()
477 compiler->vars[i].alloc, compiler->vars[i].alloc); in sse_load_constants_outer()
482 orc_compiler_error(compiler,"bad vartype"); in sse_load_constants_outer()
487 orc_compiler_emit_invariants (compiler); in sse_load_constants_outer()
490 for(i=0;i<compiler->n_constants;i++){ in sse_load_constants_outer()
491 compiler->constants[i].alloc_reg = in sse_load_constants_outer()
492 orc_compiler_get_constant_reg (compiler); in sse_load_constants_outer()
495 for(i=0;i<compiler->n_constants;i++){ in sse_load_constants_outer()
496 if (compiler->constants[i].alloc_reg) { in sse_load_constants_outer()
497 if (compiler->constants[i].is_long) { in sse_load_constants_outer()
498 sse_load_constant_long (compiler, compiler->constants[i].alloc_reg, in sse_load_constants_outer()
499 compiler->constants + i); in sse_load_constants_outer()
501 sse_load_constant (compiler, compiler->constants[i].alloc_reg, in sse_load_constants_outer()
502 4, compiler->constants[i].value); in sse_load_constants_outer()
508 for(i=0;i<compiler->n_insns;i++){ in sse_load_constants_outer()
509 OrcInstruction *insn = compiler->insns + i; in sse_load_constants_outer()
516 if (compiler->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { in sse_load_constants_outer()
517 orc_x86_emit_mov_memoffset_reg (compiler, 4, in sse_load_constants_outer()
519 compiler->exec_reg, in sse_load_constants_outer()
520 compiler->vars[insn->src_args[0]].ptr_offset); in sse_load_constants_outer()
522 orc_x86_emit_mov_imm_reg (compiler, 4, in sse_load_constants_outer()
523 compiler->vars[insn->src_args[1]].value.i, in sse_load_constants_outer()
524 compiler->vars[insn->src_args[0]].ptr_offset); in sse_load_constants_outer()
532 sse_load_constants_inner (OrcCompiler *compiler) in sse_load_constants_inner() argument
536 if (compiler->vars[i].name == NULL) continue; in sse_load_constants_inner()
537 switch (compiler->vars[i].vartype) { in sse_load_constants_inner()
544 if (compiler->vars[i].ptr_register) { in sse_load_constants_inner()
545 orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4, in sse_load_constants_inner()
546 (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]), compiler->exec_reg, in sse_load_constants_inner()
547 compiler->vars[i].ptr_register); in sse_load_constants_inner()
555 orc_compiler_error(compiler,"bad vartype"); in sse_load_constants_inner()
562 sse_add_strides (OrcCompiler *compiler) in sse_add_strides() argument
567 if (compiler->vars[i].name == NULL) continue; in sse_add_strides()
568 switch (compiler->vars[i].vartype) { in sse_add_strides()
575 orc_x86_emit_mov_memoffset_reg (compiler, 4, in sse_add_strides()
576 (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]), compiler->exec_reg, in sse_add_strides()
577 compiler->gp_tmpreg); in sse_add_strides()
578 orc_x86_emit_add_reg_memoffset (compiler, compiler->is_64bit ? 8 : 4, in sse_add_strides()
579 compiler->gp_tmpreg, in sse_add_strides()
580 (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]), compiler->exec_reg); in sse_add_strides()
582 if (compiler->vars[i].ptr_register == 0) { in sse_add_strides()
583 orc_compiler_error (compiler, "unimplemented: stride on pointer stored in memory"); in sse_add_strides()
591 orc_compiler_error(compiler,"bad vartype"); in sse_add_strides()
598 get_align_var (OrcCompiler *compiler) in get_align_var() argument
602 if (compiler->vars[i].size == 0) continue; in get_align_var()
603 if ((compiler->vars[i].size << compiler->loop_shift) >= 16) { in get_align_var()
608 if (compiler->vars[i].size == 0) continue; in get_align_var()
609 if ((compiler->vars[i].size << compiler->loop_shift) >= 8) { in get_align_var()
614 if (compiler->vars[i].size == 0) continue; in get_align_var()
618 orc_compiler_error(compiler, "could not find alignment variable"); in get_align_var()
643 orc_emit_split_3_regions (OrcCompiler *compiler) in orc_emit_split_3_regions() argument
649 align_var = get_align_var (compiler); in orc_emit_split_3_regions()
652 var_size_shift = get_shift (compiler->vars[align_var].size); in orc_emit_split_3_regions()
653 align_shift = var_size_shift + compiler->loop_shift; in orc_emit_split_3_regions()
656 orc_x86_emit_mov_imm_reg (compiler, 4, 16, X86_EAX); in orc_emit_split_3_regions()
657 orc_x86_emit_sub_memoffset_reg (compiler, 4, in orc_emit_split_3_regions()
659 compiler->exec_reg, X86_EAX); in orc_emit_split_3_regions()
660 orc_x86_emit_and_imm_reg (compiler, 4, (1<<align_shift) - 1, X86_EAX); in orc_emit_split_3_regions()
661 orc_x86_emit_sar_imm_reg (compiler, 4, var_size_shift, X86_EAX); in orc_emit_split_3_regions()
664 orc_x86_emit_cmp_reg_memoffset (compiler, 4, X86_EAX, in orc_emit_split_3_regions()
665 (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg); in orc_emit_split_3_regions()
667 orc_x86_emit_jle (compiler, 6); in orc_emit_split_3_regions()
670 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_emit_split_3_regions()
671 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); in orc_emit_split_3_regions()
674 orc_x86_emit_mov_memoffset_reg (compiler, 4, in orc_emit_split_3_regions()
675 (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, in orc_emit_split_3_regions()
676 compiler->gp_tmpreg); in orc_emit_split_3_regions()
677 orc_x86_emit_sub_reg_reg (compiler, 4, X86_EAX, compiler->gp_tmpreg); in orc_emit_split_3_regions()
679 orc_x86_emit_mov_reg_reg (compiler, 4, compiler->gp_tmpreg, X86_EAX); in orc_emit_split_3_regions()
681 orc_x86_emit_sar_imm_reg (compiler, 4, in orc_emit_split_3_regions()
682 compiler->loop_shift + compiler->unroll_shift, in orc_emit_split_3_regions()
683 compiler->gp_tmpreg); in orc_emit_split_3_regions()
684 orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, in orc_emit_split_3_regions()
685 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); in orc_emit_split_3_regions()
688 orc_x86_emit_and_imm_reg (compiler, 4, in orc_emit_split_3_regions()
689 (1<<(compiler->loop_shift + compiler->unroll_shift))-1, X86_EAX); in orc_emit_split_3_regions()
690 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_emit_split_3_regions()
691 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); in orc_emit_split_3_regions()
693 orc_x86_emit_jmp (compiler, 7); in orc_emit_split_3_regions()
696 orc_x86_emit_label (compiler, 6); in orc_emit_split_3_regions()
698 orc_x86_emit_mov_memoffset_reg (compiler, 4, in orc_emit_split_3_regions()
699 (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, X86_EAX); in orc_emit_split_3_regions()
700 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_emit_split_3_regions()
701 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); in orc_emit_split_3_regions()
702 orc_x86_emit_mov_imm_reg (compiler, 4, 0, X86_EAX); in orc_emit_split_3_regions()
703 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_emit_split_3_regions()
704 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); in orc_emit_split_3_regions()
705 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_emit_split_3_regions()
706 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); in orc_emit_split_3_regions()
708 orc_x86_emit_label (compiler, 7); in orc_emit_split_3_regions()
712 orc_emit_split_2_regions (OrcCompiler *compiler) in orc_emit_split_2_regions() argument
718 align_var = get_align_var (compiler); in orc_emit_split_2_regions()
721 var_size_shift = get_shift (compiler->vars[align_var].size); in orc_emit_split_2_regions()
722 align_shift = var_size_shift + compiler->loop_shift; in orc_emit_split_2_regions()
725 orc_x86_emit_mov_memoffset_reg (compiler, 4, in orc_emit_split_2_regions()
726 (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, in orc_emit_split_2_regions()
727 compiler->gp_tmpreg); in orc_emit_split_2_regions()
728 orc_x86_emit_mov_reg_reg (compiler, 4, compiler->gp_tmpreg, X86_EAX); in orc_emit_split_2_regions()
729 orc_x86_emit_sar_imm_reg (compiler, 4, in orc_emit_split_2_regions()
730 compiler->loop_shift + compiler->unroll_shift, in orc_emit_split_2_regions()
731 compiler->gp_tmpreg); in orc_emit_split_2_regions()
732 orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, in orc_emit_split_2_regions()
733 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); in orc_emit_split_2_regions()
736 orc_x86_emit_and_imm_reg (compiler, 4, in orc_emit_split_2_regions()
737 (1<<(compiler->loop_shift + compiler->unroll_shift))-1, X86_EAX); in orc_emit_split_2_regions()
738 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_emit_split_2_regions()
739 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); in orc_emit_split_2_regions()
751 orc_compiler_sse_save_registers (OrcCompiler *compiler) in orc_compiler_sse_save_registers() argument
756 if (compiler->save_regs[X86_XMM0 + i] == 1) { in orc_compiler_sse_save_registers()
761 orc_x86_emit_mov_imm_reg (compiler, 4, 16 * saved, compiler->gp_tmpreg); in orc_compiler_sse_save_registers()
762 orc_x86_emit_sub_reg_reg (compiler, compiler->is_64bit ? 8 : 4, in orc_compiler_sse_save_registers()
763 compiler->gp_tmpreg, X86_ESP); in orc_compiler_sse_save_registers()
766 if (compiler->save_regs[X86_XMM0 + i] == 1) { in orc_compiler_sse_save_registers()
767 orc_x86_emit_mov_sse_memoffset (compiler, 16, X86_XMM0 + i, in orc_compiler_sse_save_registers()
776 orc_compiler_sse_restore_registers (OrcCompiler *compiler) in orc_compiler_sse_restore_registers() argument
781 if (compiler->save_regs[X86_XMM0 + i] == 1) { in orc_compiler_sse_restore_registers()
782 orc_x86_emit_mov_memoffset_sse (compiler, 16, saved * 16, X86_ESP, in orc_compiler_sse_restore_registers()
788 orc_x86_emit_mov_imm_reg (compiler, 4, 16 * saved, compiler->gp_tmpreg); in orc_compiler_sse_restore_registers()
789 orc_x86_emit_add_reg_reg (compiler, compiler->is_64bit ? 8 : 4, in orc_compiler_sse_restore_registers()
790 compiler->gp_tmpreg, X86_ESP); in orc_compiler_sse_restore_registers()
795 orc_compiler_sse_assemble (OrcCompiler *compiler) in orc_compiler_sse_assemble() argument
803 if (0 && orc_x86_assemble_copy_check (compiler)) { in orc_compiler_sse_assemble()
805 orc_x86_assemble_copy (compiler); in orc_compiler_sse_assemble()
809 align_var = get_align_var (compiler); in orc_compiler_sse_assemble()
811 orc_x86_assemble_copy (compiler); in orc_compiler_sse_assemble()
814 is_aligned = compiler->vars[align_var].is_aligned; in orc_compiler_sse_assemble()
817 orc_sse_emit_loop (compiler, 0, 0); in orc_compiler_sse_assemble()
819 compiler->codeptr = compiler->code; in orc_compiler_sse_assemble()
820 free (compiler->asm_code); in orc_compiler_sse_assemble()
821 compiler->asm_code = NULL; in orc_compiler_sse_assemble()
822 compiler->asm_code_len = 0; in orc_compiler_sse_assemble()
823 memset (compiler->labels, 0, sizeof (compiler->labels)); in orc_compiler_sse_assemble()
824 memset (compiler->labels_int, 0, sizeof (compiler->labels_int)); in orc_compiler_sse_assemble()
825 compiler->n_fixups = 0; in orc_compiler_sse_assemble()
826 compiler->n_output_insns = 0; in orc_compiler_sse_assemble()
829 if (compiler->error) return; in orc_compiler_sse_assemble()
831 orc_x86_emit_prologue (compiler); in orc_compiler_sse_assemble()
833 orc_compiler_sse_save_registers (compiler); in orc_compiler_sse_assemble()
836 if (orc_program_has_float (compiler)) { in orc_compiler_sse_assemble()
838 orc_sse_set_mxcsr (compiler); in orc_compiler_sse_assemble()
842 sse_load_constants_outer (compiler); in orc_compiler_sse_assemble()
844 if (compiler->program->is_2d) { in orc_compiler_sse_assemble()
845 if (compiler->program->constant_m > 0) { in orc_compiler_sse_assemble()
846 orc_x86_emit_mov_imm_reg (compiler, 4, compiler->program->constant_m, in orc_compiler_sse_assemble()
848 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_compiler_sse_assemble()
850 compiler->exec_reg); in orc_compiler_sse_assemble()
852 orc_x86_emit_mov_memoffset_reg (compiler, 4, in orc_compiler_sse_assemble()
854 compiler->exec_reg, X86_EAX); in orc_compiler_sse_assemble()
855 orc_x86_emit_test_reg_reg (compiler, 4, X86_EAX, X86_EAX); in orc_compiler_sse_assemble()
856 orc_x86_emit_jle (compiler, LABEL_OUTER_LOOP_SKIP); in orc_compiler_sse_assemble()
857 orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, in orc_compiler_sse_assemble()
859 compiler->exec_reg); in orc_compiler_sse_assemble()
862 orc_x86_emit_label (compiler, LABEL_OUTER_LOOP); in orc_compiler_sse_assemble()
865 if (compiler->program->constant_n > 0 && in orc_compiler_sse_assemble()
866 compiler->program->constant_n <= ORC_SSE_ALIGNED_DEST_CUTOFF) { in orc_compiler_sse_assemble()
868 } else if (compiler->loop_shift > 0) { in orc_compiler_sse_assemble()
869 if (compiler->has_iterator_opcode || is_aligned) { in orc_compiler_sse_assemble()
870 orc_emit_split_2_regions (compiler); in orc_compiler_sse_assemble()
873 orc_emit_split_3_regions (compiler); in orc_compiler_sse_assemble()
877 orc_x86_emit_mov_memoffset_reg (compiler, 4, in orc_compiler_sse_assemble()
878 (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, in orc_compiler_sse_assemble()
879 compiler->gp_tmpreg); in orc_compiler_sse_assemble()
880 orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, in orc_compiler_sse_assemble()
881 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); in orc_compiler_sse_assemble()
884 sse_load_constants_inner (compiler); in orc_compiler_sse_assemble()
886 if (compiler->program->constant_n > 0 && in orc_compiler_sse_assemble()
887 compiler->program->constant_n <= ORC_SSE_ALIGNED_DEST_CUTOFF) { in orc_compiler_sse_assemble()
888 int n_left = compiler->program->constant_n; in orc_compiler_sse_assemble()
892 compiler->offset = 0; in orc_compiler_sse_assemble()
894 save_loop_shift = compiler->loop_shift; in orc_compiler_sse_assemble()
895 while (n_left >= (1<<compiler->loop_shift)) { in orc_compiler_sse_assemble()
896 ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); in orc_compiler_sse_assemble()
897 orc_sse_emit_loop (compiler, compiler->offset, 0); in orc_compiler_sse_assemble()
899 n_left -= 1<<compiler->loop_shift; in orc_compiler_sse_assemble()
900 compiler->offset += 1<<compiler->loop_shift; in orc_compiler_sse_assemble()
902 for(loop_shift = compiler->loop_shift-1; loop_shift>=0; loop_shift--) { in orc_compiler_sse_assemble()
904 compiler->loop_shift = loop_shift; in orc_compiler_sse_assemble()
905 ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", loop_shift); in orc_compiler_sse_assemble()
906 orc_sse_emit_loop (compiler, compiler->offset, 0); in orc_compiler_sse_assemble()
908 compiler->offset += 1<<loop_shift; in orc_compiler_sse_assemble()
911 compiler->loop_shift = save_loop_shift; in orc_compiler_sse_assemble()
918 if (compiler->has_iterator_opcode || is_aligned) { in orc_compiler_sse_assemble()
921 if (compiler->loop_shift == 0) { in orc_compiler_sse_assemble()
930 save_loop_shift = compiler->loop_shift; in orc_compiler_sse_assemble()
931 compiler->vars[align_var].is_aligned = FALSE; in orc_compiler_sse_assemble()
934 compiler->loop_shift = l; in orc_compiler_sse_assemble()
935 ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); in orc_compiler_sse_assemble()
937 orc_x86_emit_test_imm_memoffset (compiler, 4, 1<<compiler->loop_shift, in orc_compiler_sse_assemble()
938 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); in orc_compiler_sse_assemble()
939 orc_x86_emit_je (compiler, LABEL_STEP_UP(compiler->loop_shift)); in orc_compiler_sse_assemble()
940 orc_sse_emit_loop (compiler, 0, 1<<compiler->loop_shift); in orc_compiler_sse_assemble()
941 orc_x86_emit_label (compiler, LABEL_STEP_UP(compiler->loop_shift)); in orc_compiler_sse_assemble()
944 compiler->loop_shift = save_loop_shift; in orc_compiler_sse_assemble()
945 compiler->vars[align_var].is_aligned = TRUE; in orc_compiler_sse_assemble()
948 orc_x86_emit_label (compiler, LABEL_REGION1_SKIP); in orc_compiler_sse_assemble()
950 orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0, in orc_compiler_sse_assemble()
951 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); in orc_compiler_sse_assemble()
952 orc_x86_emit_je (compiler, LABEL_REGION2_SKIP); in orc_compiler_sse_assemble()
954 if (compiler->loop_counter != ORC_REG_INVALID) { in orc_compiler_sse_assemble()
955 orc_x86_emit_mov_memoffset_reg (compiler, 4, in orc_compiler_sse_assemble()
956 (int)ORC_STRUCT_OFFSET(OrcExecutor, counter2), compiler->exec_reg, in orc_compiler_sse_assemble()
957 compiler->loop_counter); in orc_compiler_sse_assemble()
960 ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); in orc_compiler_sse_assemble()
961 orc_x86_emit_align (compiler, 4); in orc_compiler_sse_assemble()
962 orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START); in orc_compiler_sse_assemble()
963 ui_max = 1<<compiler->unroll_shift; in orc_compiler_sse_assemble()
965 compiler->offset = ui<<compiler->loop_shift; in orc_compiler_sse_assemble()
966 orc_sse_emit_loop (compiler, compiler->offset, in orc_compiler_sse_assemble()
967 (ui==ui_max-1) << (compiler->loop_shift + compiler->unroll_shift)); in orc_compiler_sse_assemble()
969 compiler->offset = 0; in orc_compiler_sse_assemble()
970 if (compiler->loop_counter != ORC_REG_INVALID) { in orc_compiler_sse_assemble()
971 orc_x86_emit_add_imm_reg (compiler, 4, -1, compiler->loop_counter, TRUE); in orc_compiler_sse_assemble()
973 orc_x86_emit_dec_memoffset (compiler, 4, in orc_compiler_sse_assemble()
975 compiler->exec_reg); in orc_compiler_sse_assemble()
977 orc_x86_emit_jne (compiler, LABEL_INNER_LOOP_START); in orc_compiler_sse_assemble()
978 orc_x86_emit_label (compiler, LABEL_REGION2_SKIP); in orc_compiler_sse_assemble()
984 save_loop_shift = compiler->loop_shift + compiler->unroll_shift; in orc_compiler_sse_assemble()
985 compiler->vars[align_var].is_aligned = FALSE; in orc_compiler_sse_assemble()
988 compiler->loop_shift = l; in orc_compiler_sse_assemble()
989 ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); in orc_compiler_sse_assemble()
991 orc_x86_emit_test_imm_memoffset (compiler, 4, 1<<compiler->loop_shift, in orc_compiler_sse_assemble()
992 (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); in orc_compiler_sse_assemble()
993 orc_x86_emit_je (compiler, LABEL_STEP_DOWN(compiler->loop_shift)); in orc_compiler_sse_assemble()
994 orc_sse_emit_loop (compiler, 0, 1<<compiler->loop_shift); in orc_compiler_sse_assemble()
995 orc_x86_emit_label (compiler, LABEL_STEP_DOWN(compiler->loop_shift)); in orc_compiler_sse_assemble()
998 compiler->loop_shift = save_loop_shift; in orc_compiler_sse_assemble()
1002 if (compiler->program->is_2d && compiler->program->constant_m != 1) { in orc_compiler_sse_assemble()
1003 sse_add_strides (compiler); in orc_compiler_sse_assemble()
1005 orc_x86_emit_add_imm_memoffset (compiler, 4, -1, in orc_compiler_sse_assemble()
1007 compiler->exec_reg); in orc_compiler_sse_assemble()
1008 orc_x86_emit_jne (compiler, LABEL_OUTER_LOOP); in orc_compiler_sse_assemble()
1009 orc_x86_emit_label (compiler, LABEL_OUTER_LOOP_SKIP); in orc_compiler_sse_assemble()
1012 sse_save_accumulators (compiler); in orc_compiler_sse_assemble()
1016 orc_sse_restore_mxcsr (compiler); in orc_compiler_sse_assemble()
1019 orc_x86_emit_emms (compiler); in orc_compiler_sse_assemble()
1022 orc_compiler_sse_restore_registers (compiler); in orc_compiler_sse_assemble()
1024 orc_x86_emit_epilogue (compiler); in orc_compiler_sse_assemble()
1026 orc_x86_calculate_offsets (compiler); in orc_compiler_sse_assemble()
1027 orc_x86_output_insns (compiler); in orc_compiler_sse_assemble()
1029 orc_x86_do_fixups (compiler); in orc_compiler_sse_assemble()
1033 orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) in orc_sse_emit_loop() argument
1041 for(j=0;j<compiler->n_insns;j++){ in orc_sse_emit_loop()
1042 insn = compiler->insns + j; in orc_sse_emit_loop()
1045 compiler->insn_index = j; in orc_sse_emit_loop()
1049 ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name); in orc_sse_emit_loop()
1051 compiler->min_temp_reg = ORC_VEC_REG_BASE; in orc_sse_emit_loop()
1053 compiler->insn_shift = compiler->loop_shift; in orc_sse_emit_loop()
1055 compiler->insn_shift += 1; in orc_sse_emit_loop()
1058 compiler->insn_shift += 2; in orc_sse_emit_loop()
1064 compiler->vars[insn->dest_args[0]].alloc != in orc_sse_emit_loop()
1065 compiler->vars[insn->src_args[0]].alloc) { in orc_sse_emit_loop()
1067 orc_sse_emit_movq (compiler, in orc_sse_emit_loop()
1068 compiler->vars[insn->src_args[0]].alloc, in orc_sse_emit_loop()
1069 compiler->vars[insn->dest_args[0]].alloc); in orc_sse_emit_loop()
1071 orc_sse_emit_movdqu (compiler, in orc_sse_emit_loop()
1072 compiler->vars[insn->src_args[0]].alloc, in orc_sse_emit_loop()
1073 compiler->vars[insn->dest_args[0]].alloc); in orc_sse_emit_loop()
1076 rule->emit (compiler, rule->emit_user, insn); in orc_sse_emit_loop()
1078 orc_compiler_error (compiler, "no code generation rule for %s", in orc_sse_emit_loop()
1085 OrcVariable *var = compiler->vars + k; in orc_sse_emit_loop()
1100 if (compiler->vars[k].ptr_register) { in orc_sse_emit_loop()
1101 orc_x86_emit_add_imm_reg (compiler, compiler->is_64bit ? 8 : 4, in orc_sse_emit_loop()
1103 compiler->vars[k].ptr_register, FALSE); in orc_sse_emit_loop()
1105 orc_x86_emit_add_imm_memoffset (compiler, compiler->is_64bit ? 8 : 4, in orc_sse_emit_loop()
1108 compiler->exec_reg); in orc_sse_emit_loop()