1;  z_Windows_NT-586_asm.asm:  - microtasking routines specifically
2;    written for IA-32 architecture and Intel(R) 64 running Windows* OS
3
4;
5;//===----------------------------------------------------------------------===//
6;//
7;//                     The LLVM Compiler Infrastructure
8;//
9;// This file is dual licensed under the MIT and the University of Illinois Open
10;// Source Licenses. See LICENSE.txt for details.
11;//
12;//===----------------------------------------------------------------------===//
13;
14
15        TITLE   z_Windows_NT-586_asm.asm
16
17; ============================= IA-32 architecture ==========================
18ifdef _M_IA32
19
20        .586P
21
22if @Version gt 510
23        .model HUGE
24else
25_TEXT   SEGMENT PARA USE32 PUBLIC 'CODE'
26_TEXT   ENDS
27_DATA   SEGMENT DWORD USE32 PUBLIC 'DATA'
28_DATA   ENDS
29CONST   SEGMENT DWORD USE32 PUBLIC 'CONST'
30CONST   ENDS
31_BSS    SEGMENT DWORD USE32 PUBLIC 'BSS'
32_BSS    ENDS
33$$SYMBOLS       SEGMENT BYTE USE32 'DEBSYM'
34$$SYMBOLS       ENDS
35$$TYPES SEGMENT BYTE USE32 'DEBTYP'
36$$TYPES ENDS
37_TLS    SEGMENT DWORD USE32 PUBLIC 'TLS'
38_TLS    ENDS
39FLAT    GROUP _DATA, CONST, _BSS
40        ASSUME  CS: FLAT, DS: FLAT, SS: FLAT
41endif
42
43
44;------------------------------------------------------------------------
45; FUNCTION ___kmp_x86_pause
46;
47; void
48; __kmp_x86_pause( void )
49PUBLIC  ___kmp_x86_pause
50_p$ = 4
51_d$ = 8
52_TEXT   SEGMENT
53        ALIGN 16
54___kmp_x86_pause PROC NEAR
55
56        db      0f3H
57        db      090H    ;; pause
58        ret
59
60___kmp_x86_pause ENDP
61_TEXT   ENDS
62
63;------------------------------------------------------------------------
64; FUNCTION ___kmp_x86_cpuid
65;
66; void
67; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
68PUBLIC  ___kmp_x86_cpuid
69_TEXT   SEGMENT
70        ALIGN 16
71_mode$  = 8
72_mode2$ = 12
73_p$     = 16
74_eax$   = 0
75_ebx$   = 4
76_ecx$   = 8
77_edx$   = 12
78
79___kmp_x86_cpuid PROC NEAR
80
81        push      ebp
82        mov       ebp, esp
83
84        push      edi
85        push      ebx
86        push      ecx
87        push      edx
88
89        mov	  eax, DWORD PTR _mode$[ebp]
90        mov	  ecx, DWORD PTR _mode2$[ebp]
91	cpuid					; Query the CPUID for the current processor
92
93        mov       edi, DWORD PTR _p$[ebp]
94	mov 	  DWORD PTR _eax$[ edi ], eax
95	mov 	  DWORD PTR _ebx$[ edi ], ebx
96	mov 	  DWORD PTR _ecx$[ edi ], ecx
97	mov 	  DWORD PTR _edx$[ edi ], edx
98
99        pop       edx
100        pop       ecx
101        pop       ebx
102        pop       edi
103
104        mov       esp, ebp
105        pop       ebp
106        ret
107
108___kmp_x86_cpuid ENDP
109_TEXT     ENDS
110
111;------------------------------------------------------------------------
112; FUNCTION ___kmp_test_then_add32
113;
114; kmp_int32
115; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
116PUBLIC  ___kmp_test_then_add32
117_p$ = 4
118_d$ = 8
119_TEXT   SEGMENT
120        ALIGN 16
121___kmp_test_then_add32 PROC NEAR
122
123        mov     eax, DWORD PTR _d$[esp]
124        mov     ecx, DWORD PTR _p$[esp]
125lock    xadd    DWORD PTR [ecx], eax
126        ret
127
128___kmp_test_then_add32 ENDP
129_TEXT   ENDS
130
131;------------------------------------------------------------------------
132; FUNCTION ___kmp_compare_and_store8
133;
134; kmp_int8
135; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
136PUBLIC  ___kmp_compare_and_store8
137_TEXT   SEGMENT
138        ALIGN 16
139_p$ = 4
140_cv$ = 8
141_sv$ = 12
142
143___kmp_compare_and_store8 PROC NEAR
144
145        mov       ecx, DWORD PTR _p$[esp]
146        mov       al, BYTE PTR _cv$[esp]
147        mov       dl, BYTE PTR _sv$[esp]
148lock    cmpxchg   BYTE PTR [ecx], dl
149        sete      al           ; if al == [ecx] set al = 1 else set al = 0
150        and       eax, 1       ; sign extend previous instruction
151        ret
152
153___kmp_compare_and_store8 ENDP
154_TEXT     ENDS
155
156;------------------------------------------------------------------------
157; FUNCTION ___kmp_compare_and_store16
158;
159; kmp_int16
160; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
161PUBLIC  ___kmp_compare_and_store16
162_TEXT   SEGMENT
163        ALIGN 16
164_p$ = 4
165_cv$ = 8
166_sv$ = 12
167
168___kmp_compare_and_store16 PROC NEAR
169
170        mov       ecx, DWORD PTR _p$[esp]
171        mov       ax, WORD PTR _cv$[esp]
172        mov       dx, WORD PTR _sv$[esp]
173lock    cmpxchg   WORD PTR [ecx], dx
174        sete      al           ; if ax == [ecx] set al = 1 else set al = 0
175        and       eax, 1       ; sign extend previous instruction
176        ret
177
178___kmp_compare_and_store16 ENDP
179_TEXT     ENDS
180
181;------------------------------------------------------------------------
182; FUNCTION ___kmp_compare_and_store32
183;
184; kmp_int32
185; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
186PUBLIC  ___kmp_compare_and_store32
187_TEXT   SEGMENT
188        ALIGN 16
189_p$ = 4
190_cv$ = 8
191_sv$ = 12
192
193___kmp_compare_and_store32 PROC NEAR
194
195        mov       ecx, DWORD PTR _p$[esp]
196        mov       eax, DWORD PTR _cv$[esp]
197        mov       edx, DWORD PTR _sv$[esp]
198lock    cmpxchg   DWORD PTR [ecx], edx
199        sete      al           ; if eax == [ecx] set al = 1 else set al = 0
200        and       eax, 1       ; sign extend previous instruction
201        ret
202
203___kmp_compare_and_store32 ENDP
204_TEXT     ENDS
205
206;------------------------------------------------------------------------
207; FUNCTION ___kmp_compare_and_store64
208;
209; kmp_int32
210; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
211PUBLIC  ___kmp_compare_and_store64
212_TEXT   SEGMENT
213        ALIGN 16
214_p$ = 8
215_cv_low$ = 12
216_cv_high$ = 16
217_sv_low$ = 20
218_sv_high$ = 24
219
220___kmp_compare_and_store64 PROC NEAR
221
222        push      ebp
223        mov       ebp, esp
224        push      ebx
225        push      edi
226        mov       edi, DWORD PTR _p$[ebp]
227        mov       eax, DWORD PTR _cv_low$[ebp]
228        mov       edx, DWORD PTR _cv_high$[ebp]
229        mov       ebx, DWORD PTR _sv_low$[ebp]
230        mov       ecx, DWORD PTR _sv_high$[ebp]
231lock    cmpxchg8b QWORD PTR [edi]
232        sete      al           ; if edx:eax == [edi] set al = 1 else set al = 0
233        and       eax, 1       ; sign extend previous instruction
234        pop       edi
235        pop       ebx
236        mov       esp, ebp
237        pop       ebp
238        ret
239
240___kmp_compare_and_store64 ENDP
241_TEXT     ENDS
242
243;------------------------------------------------------------------------
244; FUNCTION ___kmp_xchg_fixed8
245;
246; kmp_int8
247; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
248PUBLIC  ___kmp_xchg_fixed8
249_TEXT   SEGMENT
250        ALIGN 16
251_p$ = 4
252_d$ = 8
253
254___kmp_xchg_fixed8 PROC NEAR
255
256        mov       ecx, DWORD PTR _p$[esp]
257        mov       al,  BYTE PTR _d$[esp]
258lock    xchg      BYTE PTR [ecx], al
259        ret
260
261___kmp_xchg_fixed8 ENDP
262_TEXT     ENDS
263
264;------------------------------------------------------------------------
265; FUNCTION ___kmp_xchg_fixed16
266;
267; kmp_int16
268; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
269PUBLIC  ___kmp_xchg_fixed16
270_TEXT   SEGMENT
271        ALIGN 16
272_p$ = 4
273_d$ = 8
274
275___kmp_xchg_fixed16 PROC NEAR
276
277        mov       ecx, DWORD PTR _p$[esp]
278        mov       ax,  WORD PTR  _d$[esp]
279lock    xchg      WORD PTR [ecx], ax
280        ret
281
282___kmp_xchg_fixed16 ENDP
283_TEXT     ENDS
284
285;------------------------------------------------------------------------
286; FUNCTION ___kmp_xchg_fixed32
287;
288; kmp_int32
289; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
290PUBLIC  ___kmp_xchg_fixed32
291_TEXT   SEGMENT
292        ALIGN 16
293_p$ = 4
294_d$ = 8
295
296___kmp_xchg_fixed32 PROC NEAR
297
298        mov       ecx, DWORD PTR _p$[esp]
299        mov       eax, DWORD PTR _d$[esp]
300lock    xchg      DWORD PTR [ecx], eax
301        ret
302
303___kmp_xchg_fixed32 ENDP
304_TEXT     ENDS
305
306
307;------------------------------------------------------------------------
308; FUNCTION ___kmp_xchg_real32
309;
310; kmp_real32
311; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
312PUBLIC  ___kmp_xchg_real32
313_TEXT   SEGMENT
314        ALIGN 16
315_p$ = 8
316_d$ = 12
317_old_value$ = -4
318
319___kmp_xchg_real32 PROC NEAR
320
321        push    ebp
322        mov     ebp, esp
323        sub     esp, 4
324        push    esi
325        mov     esi, DWORD PTR _p$[ebp]
326
327        fld     DWORD PTR [esi]
328                        ;; load <addr>
329        fst     DWORD PTR _old_value$[ebp]
330                        ;; store into old_value
331
332        mov     eax, DWORD PTR _d$[ebp]
333
334lock    xchg    DWORD PTR [esi], eax
335
336        fld     DWORD PTR _old_value$[ebp]
337                        ;; return old_value
338        pop     esi
339        mov     esp, ebp
340        pop     ebp
341        ret
342
343___kmp_xchg_real32 ENDP
344_TEXT   ENDS
345
346
347;------------------------------------------------------------------------
348; FUNCTION ___kmp_compare_and_store_ret8
349;
350; kmp_int8
351; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
352PUBLIC  ___kmp_compare_and_store_ret8
353_TEXT   SEGMENT
354        ALIGN 16
355_p$ = 4
356_cv$ = 8
357_sv$ = 12
358
359___kmp_compare_and_store_ret8 PROC NEAR
360
361        mov       ecx, DWORD PTR _p$[esp]
362        mov       al, BYTE PTR _cv$[esp]
363        mov       dl, BYTE PTR _sv$[esp]
364lock    cmpxchg   BYTE PTR [ecx], dl
365        ret
366
367___kmp_compare_and_store_ret8 ENDP
368_TEXT     ENDS
369
370;------------------------------------------------------------------------
371; FUNCTION ___kmp_compare_and_store_ret16
372;
373; kmp_int16
374; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
375PUBLIC  ___kmp_compare_and_store_ret16
376_TEXT   SEGMENT
377        ALIGN 16
378_p$ = 4
379_cv$ = 8
380_sv$ = 12
381
382___kmp_compare_and_store_ret16 PROC NEAR
383
384        mov       ecx, DWORD PTR _p$[esp]
385        mov       ax, WORD PTR _cv$[esp]
386        mov       dx, WORD PTR _sv$[esp]
387lock    cmpxchg   WORD PTR [ecx], dx
388        ret
389
390___kmp_compare_and_store_ret16 ENDP
391_TEXT     ENDS
392
393;------------------------------------------------------------------------
394; FUNCTION ___kmp_compare_and_store_ret32
395;
396; kmp_int32
397; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
398PUBLIC  ___kmp_compare_and_store_ret32
399_TEXT   SEGMENT
400        ALIGN 16
401_p$ = 4
402_cv$ = 8
403_sv$ = 12
404
405___kmp_compare_and_store_ret32 PROC NEAR
406
407        mov       ecx, DWORD PTR _p$[esp]
408        mov       eax, DWORD PTR _cv$[esp]
409        mov       edx, DWORD PTR _sv$[esp]
410lock    cmpxchg   DWORD PTR [ecx], edx
411        ret
412
413___kmp_compare_and_store_ret32 ENDP
414_TEXT     ENDS
415
416;------------------------------------------------------------------------
417; FUNCTION ___kmp_compare_and_store_ret64
418;
419; kmp_int64
420; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
421PUBLIC  ___kmp_compare_and_store_ret64
422_TEXT   SEGMENT
423        ALIGN 16
424_p$ = 8
425_cv_low$ = 12
426_cv_high$ = 16
427_sv_low$ = 20
428_sv_high$ = 24
429
430___kmp_compare_and_store_ret64 PROC NEAR
431
432        push      ebp
433        mov       ebp, esp
434        push      ebx
435        push      edi
436        mov       edi, DWORD PTR _p$[ebp]
437        mov       eax, DWORD PTR _cv_low$[ebp]
438        mov       edx, DWORD PTR _cv_high$[ebp]
439        mov       ebx, DWORD PTR _sv_low$[ebp]
440        mov       ecx, DWORD PTR _sv_high$[ebp]
441lock    cmpxchg8b QWORD PTR [edi]
442        pop       edi
443        pop       ebx
444        mov       esp, ebp
445        pop       ebp
446        ret
447
448___kmp_compare_and_store_ret64 ENDP
449_TEXT     ENDS
450
451;------------------------------------------------------------------------
452; FUNCTION ___kmp_load_x87_fpu_control_word
453;
454; void
455; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
456;
457; parameters:
458;       p:      4(%esp)
459PUBLIC  ___kmp_load_x87_fpu_control_word
460_TEXT   SEGMENT
461        ALIGN 16
462_p$ = 4
463
464___kmp_load_x87_fpu_control_word PROC NEAR
465
466        mov       eax, DWORD PTR _p$[esp]
467        fldcw     WORD PTR [eax]
468        ret
469
470___kmp_load_x87_fpu_control_word ENDP
471_TEXT     ENDS
472
473;------------------------------------------------------------------------
474; FUNCTION ___kmp_store_x87_fpu_control_word
475;
476; void
477; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
478;
479; parameters:
480;       p:      4(%esp)
481PUBLIC  ___kmp_store_x87_fpu_control_word
482_TEXT   SEGMENT
483        ALIGN 16
484_p$ = 4
485
486___kmp_store_x87_fpu_control_word PROC NEAR
487
488        mov       eax, DWORD PTR _p$[esp]
489        fstcw     WORD PTR [eax]
490        ret
491
492___kmp_store_x87_fpu_control_word ENDP
493_TEXT     ENDS
494
495;------------------------------------------------------------------------
496; FUNCTION ___kmp_clear_x87_fpu_status_word
497;
498; void
499; __kmp_clear_x87_fpu_status_word();
500PUBLIC  ___kmp_clear_x87_fpu_status_word
501_TEXT   SEGMENT
502        ALIGN 16
503
504___kmp_clear_x87_fpu_status_word PROC NEAR
505
506        fnclex
507        ret
508
509___kmp_clear_x87_fpu_status_word ENDP
510_TEXT     ENDS
511
512
513;------------------------------------------------------------------------
514; FUNCTION ___kmp_invoke_microtask
515;
516; typedef void  (*microtask_t)( int *gtid, int *tid, ... );
517;
518; int
519; __kmp_invoke_microtask( microtask_t pkfn,
520;                         int gtid, int tid,
521;                         int argc, void *p_argv[] )
522PUBLIC  ___kmp_invoke_microtask
523_TEXT   SEGMENT
524        ALIGN 16
525_pkfn$ = 8
526_gtid$ = 12
527_tid$ = 16
528_argc$ = 20
529_argv$ = 24
530if OMPT_SUPPORT
531_exit_frame$ = 28
532endif
533_i$ = -8
534_stk_adj$ = -16
535_vptr$ = -12
536_qptr$ = -4
537
538___kmp_invoke_microtask PROC NEAR
539; Line 102
540        push    ebp
541        mov     ebp, esp
542        sub     esp, 16                                 ; 00000010H
543        push    ebx
544        push    esi
545        push    edi
546if OMPT_SUPPORT
547        mov     eax, DWORD PTR _exit_frame$[ebp]
548        mov     DWORD PTR [eax], ebp
549endif
550; Line 114
551        mov     eax, DWORD PTR _argc$[ebp]
552        mov     DWORD PTR _i$[ebp], eax
553
554;; ------------------------------------------------------------
555	lea     edx, DWORD PTR [eax*4+8]
556	mov     ecx, esp                                ; Save current SP into ECX
557	mov	eax,edx		; Save the size of the args in eax
558	sub	ecx,edx		; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this
559	mov	edx,ecx		; Save to edx
560	and	ecx,-128	; Mask off 7 bits
561	sub	edx,ecx		; Amount to subtract from esp
562	sub	esp,edx		; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call
563
564	add	edx,eax		; Calculate total size of the stack decrement.
565        mov     DWORD PTR _stk_adj$[ebp], edx
566;; ------------------------------------------------------------
567
568        jmp     SHORT $L22237
569$L22238:
570        mov     ecx, DWORD PTR _i$[ebp]
571        sub     ecx, 1
572        mov     DWORD PTR _i$[ebp], ecx
573$L22237:
574        cmp     DWORD PTR _i$[ebp], 0
575        jle     SHORT $L22239
576; Line 116
577        mov     edx, DWORD PTR _i$[ebp]
578        mov     eax, DWORD PTR _argv$[ebp]
579        mov     ecx, DWORD PTR [eax+edx*4-4]
580        mov     DWORD PTR _vptr$[ebp], ecx
581; Line 123
582        mov     eax, DWORD PTR _vptr$[ebp]
583; Line 124
584        push    eax
585; Line 127
586        jmp     SHORT $L22238
587$L22239:
588; Line 129
589        lea     edx, DWORD PTR _tid$[ebp]
590        mov     DWORD PTR _vptr$[ebp], edx
591; Line 130
592        lea     eax, DWORD PTR _gtid$[ebp]
593        mov     DWORD PTR _qptr$[ebp], eax
594; Line 143
595        mov     eax, DWORD PTR _vptr$[ebp]
596; Line 144
597        push    eax
598; Line 145
599        mov     eax, DWORD PTR _qptr$[ebp]
600; Line 146
601        push    eax
602; Line 147
603        call    DWORD PTR _pkfn$[ebp]
604; Line 148
605        add     esp, DWORD PTR _stk_adj$[ebp]
606; Line 152
607        mov     eax, 1
608; Line 153
609        pop     edi
610        pop     esi
611        pop     ebx
612        mov     esp, ebp
613        pop     ebp
614        ret     0
615___kmp_invoke_microtask ENDP
616_TEXT   ENDS
617
618endif
619
620; ==================================== Intel(R) 64 ===================================
621
622ifdef _M_AMD64
623
624;------------------------------------------------------------------------
625; FUNCTION __kmp_x86_cpuid
626;
627; void
628; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
629;
630; parameters:
631;	mode:		ecx
632;	mode2:		edx
633;	cpuid_buffer: 	r8
634PUBLIC  __kmp_x86_cpuid
635_TEXT   SEGMENT
636        ALIGN 16
637
638__kmp_x86_cpuid PROC FRAME ;NEAR
639
640        push      rbp
641        .pushreg  rbp
642        mov       rbp, rsp
643        .setframe rbp, 0
644        push      rbx				; callee-save register
645        .pushreg  rbx
646        .ENDPROLOG
647
648	mov	  r10, r8                       ; p parameter
649        mov	  eax, ecx			; mode parameter
650        mov	  ecx, edx                      ; mode2 parameter
651	cpuid					; Query the CPUID for the current processor
652
653	mov 	  DWORD PTR 0[ r10 ], eax	; store results into buffer
654	mov 	  DWORD PTR 4[ r10 ], ebx
655	mov 	  DWORD PTR 8[ r10 ], ecx
656	mov 	  DWORD PTR 12[ r10 ], edx
657
658        pop       rbx				; callee-save register
659        mov       rsp, rbp
660        pop       rbp
661        ret
662
663__kmp_x86_cpuid ENDP
664_TEXT     ENDS
665
666
667;------------------------------------------------------------------------
668; FUNCTION __kmp_test_then_add32
669;
670; kmp_int32
671; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
672;
673; parameters:
674;	p:	rcx
675;	d:	edx
676;
677; return: 	eax
678PUBLIC  __kmp_test_then_add32
679_TEXT   SEGMENT
680        ALIGN 16
681__kmp_test_then_add32 PROC ;NEAR
682
683        mov     eax, edx
684lock    xadd    DWORD PTR [rcx], eax
685        ret
686
687__kmp_test_then_add32 ENDP
688_TEXT   ENDS
689
690
691;------------------------------------------------------------------------
692; FUNCTION __kmp_test_then_add64
693;
694; kmp_int32
695; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
696;
697; parameters:
698;	p:	rcx
699;	d:	rdx
700;
701; return: 	rax
702PUBLIC  __kmp_test_then_add64
703_TEXT   SEGMENT
704        ALIGN 16
705__kmp_test_then_add64 PROC ;NEAR
706
707        mov     rax, rdx
708lock    xadd    QWORD PTR [rcx], rax
709        ret
710
711__kmp_test_then_add64 ENDP
712_TEXT   ENDS
713
714
715;------------------------------------------------------------------------
716; FUNCTION __kmp_compare_and_store8
717;
718; kmp_int8
719; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
720; parameters:
721;	p:	rcx
722;	cv:	edx
723;	sv:	r8d
724;
725; return:	eax
726PUBLIC  __kmp_compare_and_store8
727_TEXT   SEGMENT
728        ALIGN 16
729
730__kmp_compare_and_store8 PROC ;NEAR
731
732        mov       al, dl	; "cv"
733	mov	  edx, r8d	; "sv"
734lock    cmpxchg   BYTE PTR [rcx], dl
735        sete      al           	; if al == [rcx] set al = 1 else set al = 0
736        and       rax, 1       	; sign extend previous instruction
737        ret
738
739__kmp_compare_and_store8 ENDP
740_TEXT     ENDS
741
742
743;------------------------------------------------------------------------
744; FUNCTION __kmp_compare_and_store16
745;
746; kmp_int16
747; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
748; parameters:
749;	p:	rcx
750;	cv:	edx
751;	sv:	r8d
752;
753; return:	eax
754PUBLIC  __kmp_compare_and_store16
755_TEXT   SEGMENT
756        ALIGN 16
757
758__kmp_compare_and_store16 PROC ;NEAR
759
760        mov       ax, dx	; "cv"
761	mov	  edx, r8d	; "sv"
762lock    cmpxchg   WORD PTR [rcx], dx
763        sete      al           	; if ax == [rcx] set al = 1 else set al = 0
764        and       rax, 1       	; sign extend previous instruction
765        ret
766
767__kmp_compare_and_store16 ENDP
768_TEXT     ENDS
769
770
771;------------------------------------------------------------------------
772; FUNCTION __kmp_compare_and_store32
773;
774; kmp_int32
775; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
776; parameters:
777;	p:	rcx
778;	cv:	edx
779;	sv:	r8d
780;
781; return:	eax
782PUBLIC  __kmp_compare_and_store32
783_TEXT   SEGMENT
784        ALIGN 16
785
786__kmp_compare_and_store32 PROC ;NEAR
787
788        mov       eax, edx	; "cv"
789	mov	  edx, r8d	; "sv"
790lock    cmpxchg   DWORD PTR [rcx], edx
791        sete      al           	; if eax == [rcx] set al = 1 else set al = 0
792        and       rax, 1       	; sign extend previous instruction
793        ret
794
795__kmp_compare_and_store32 ENDP
796_TEXT     ENDS
797
798
799;------------------------------------------------------------------------
800; FUNCTION __kmp_compare_and_store64
801;
802; kmp_int32
803; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
804; parameters:
805;	p:	rcx
806;	cv:	rdx
807;	sv:	r8
808;
809; return:	eax
810PUBLIC  __kmp_compare_and_store64
811_TEXT   SEGMENT
812        ALIGN 16
813
814__kmp_compare_and_store64 PROC ;NEAR
815
816        mov       rax, rdx	; "cv"
817	mov	  rdx, r8	; "sv"
818lock    cmpxchg   QWORD PTR [rcx], rdx
819        sete      al           ; if rax == [rcx] set al = 1 else set al = 0
820        and       rax, 1       ; sign extend previous instruction
821        ret
822
823__kmp_compare_and_store64 ENDP
824_TEXT     ENDS
825
826
827;------------------------------------------------------------------------
828; FUNCTION ___kmp_xchg_fixed8
829;
830; kmp_int8
831; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
832;
833; parameters:
834;	p:	rcx
835;	d:	dl
836;
837; return: 	al
838PUBLIC  __kmp_xchg_fixed8
839_TEXT   SEGMENT
840        ALIGN 16
841
842__kmp_xchg_fixed8 PROC ;NEAR
843
844        mov       al,  dl
845lock    xchg      BYTE PTR [rcx], al
846        ret
847
848__kmp_xchg_fixed8 ENDP
849_TEXT     ENDS
850
851
852;------------------------------------------------------------------------
853; FUNCTION ___kmp_xchg_fixed16
854;
855; kmp_int16
856; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
857;
858; parameters:
859;	p:	rcx
860;	d:	dx
861;
862; return: 	ax
863PUBLIC  __kmp_xchg_fixed16
864_TEXT   SEGMENT
865        ALIGN 16
866
867__kmp_xchg_fixed16 PROC ;NEAR
868
869        mov       ax,  dx
870lock    xchg      WORD PTR [rcx], ax
871        ret
872
873__kmp_xchg_fixed16 ENDP
874_TEXT     ENDS
875
876
877;------------------------------------------------------------------------
878; FUNCTION ___kmp_xchg_fixed32
879;
880; kmp_int32
881; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
882;
883; parameters:
884;	p:	rcx
885;	d:	edx
886;
887; return: 	eax
888PUBLIC  __kmp_xchg_fixed32
889_TEXT   SEGMENT
890        ALIGN 16
891__kmp_xchg_fixed32 PROC ;NEAR
892
893        mov     eax, edx
894lock    xchg    DWORD PTR [rcx], eax
895        ret
896
897__kmp_xchg_fixed32 ENDP
898_TEXT   ENDS
899
900
901;------------------------------------------------------------------------
902; FUNCTION ___kmp_xchg_fixed64
903;
904; kmp_int64
905; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
906;
907; parameters:
908;	p:	rcx
909;	d:	rdx
910;
911; return: 	rax
912PUBLIC  __kmp_xchg_fixed64
913_TEXT   SEGMENT
914        ALIGN 16
915__kmp_xchg_fixed64 PROC ;NEAR
916
917        mov     rax, rdx
918lock    xchg    QWORD PTR [rcx], rax
919        ret
920
921__kmp_xchg_fixed64 ENDP
922_TEXT   ENDS
923
924
925;------------------------------------------------------------------------
926; FUNCTION __kmp_compare_and_store_ret8
927;
928; kmp_int8
929; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
930; parameters:
931;	p:	rcx
932;	cv:	edx
933;	sv:	r8d
934;
935; return:	eax
936PUBLIC  __kmp_compare_and_store_ret8
937_TEXT   SEGMENT
938        ALIGN 16
939
940__kmp_compare_and_store_ret8 PROC ;NEAR
941        mov       al, dl	; "cv"
942	mov	  edx, r8d	; "sv"
943lock    cmpxchg   BYTE PTR [rcx], dl
944                        ; Compare AL with [rcx].  If equal set
945                        ; ZF and exchange DL with [rcx].  Else, clear
946                        ; ZF and load [rcx] into AL.
947        ret
948
949__kmp_compare_and_store_ret8 ENDP
950_TEXT     ENDS
951
952
953;------------------------------------------------------------------------
954; FUNCTION __kmp_compare_and_store_ret16
955;
956; kmp_int16
957; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
958; parameters:
959;	p:	rcx
960;	cv:	edx
961;	sv:	r8d
962;
963; return:	eax
964PUBLIC  __kmp_compare_and_store_ret16
965_TEXT   SEGMENT
966        ALIGN 16
967
968__kmp_compare_and_store_ret16 PROC ;NEAR
969
970        mov       ax, dx	; "cv"
971	mov	  edx, r8d	; "sv"
972lock    cmpxchg   WORD PTR [rcx], dx
973        ret
974
975__kmp_compare_and_store_ret16 ENDP
976_TEXT     ENDS
977
978
979;------------------------------------------------------------------------
980; FUNCTION __kmp_compare_and_store_ret32
981;
982; kmp_int32
983; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
984; parameters:
985;	p:	rcx
986;	cv:	edx
987;	sv:	r8d
988;
989; return:	eax
990PUBLIC  __kmp_compare_and_store_ret32
991_TEXT   SEGMENT
992        ALIGN 16
993
994__kmp_compare_and_store_ret32 PROC ;NEAR
995
996        mov       eax, edx	; "cv"
997	mov	  edx, r8d	; "sv"
998lock    cmpxchg   DWORD PTR [rcx], edx
999        ret
1000
1001__kmp_compare_and_store_ret32 ENDP
1002_TEXT     ENDS
1003
1004
1005;------------------------------------------------------------------------
1006; FUNCTION __kmp_compare_and_store_ret64
1007;
1008; kmp_int64
1009; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1010; parameters:
1011;	p:	rcx
1012;	cv:	rdx
1013;	sv:	r8
1014;
1015; return:	rax
1016PUBLIC  __kmp_compare_and_store_ret64
1017_TEXT   SEGMENT
1018        ALIGN 16
1019
1020__kmp_compare_and_store_ret64 PROC ;NEAR
1021
1022        mov       rax, rdx	; "cv"
1023	mov	  rdx, r8	; "sv"
1024lock    cmpxchg   QWORD PTR [rcx], rdx
1025        ret
1026
1027__kmp_compare_and_store_ret64 ENDP
1028_TEXT     ENDS
1029
1030
1031;------------------------------------------------------------------------
1032; FUNCTION __kmp_compare_and_store_loop8
1033;
1034; kmp_int8
1035; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
1036; parameters:
1037;	p:	rcx
1038;	cv:	edx
1039;	sv:	r8d
1040;
1041; return:	al
1042PUBLIC  __kmp_compare_and_store_loop8
1043_TEXT   SEGMENT
1044        ALIGN 16
1045
1046__kmp_compare_and_store_loop8 PROC ;NEAR
1047$__kmp_loop:
1048        mov       al, dl	; "cv"
1049	mov	  edx, r8d	; "sv"
1050lock    cmpxchg   BYTE PTR [rcx], dl
1051                        ; Compare AL with [rcx].  If equal set
1052                        ; ZF and exchange DL with [rcx].  Else, clear
1053                        ; ZF and load [rcx] into AL.
1054        jz     	SHORT $__kmp_success
1055
1056        db      0f3H
1057        db      090H    		; pause
1058
1059	jmp	SHORT $__kmp_loop
1060
1061$__kmp_success:
1062        ret
1063
1064__kmp_compare_and_store_loop8 ENDP
1065_TEXT     ENDS
1066
1067
1068;------------------------------------------------------------------------
1069; FUNCTION __kmp_xchg_real32
1070;
1071; kmp_real32
1072; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
1073;
1074; parameters:
1075;	p:	rcx
1076;       d:	xmm1 (lower 4 bytes)
1077;
1078; return:	xmm0 (lower 4 bytes)
1079PUBLIC  __kmp_xchg_real32
1080_TEXT   SEGMENT
1081        ALIGN 16
1082__kmp_xchg_real32 PROC ;NEAR
1083
1084	movd	eax, xmm1		; load d
1085
1086lock    xchg    DWORD PTR [rcx], eax
1087
1088	movd	xmm0, eax		; load old value into return register
1089        ret
1090
1091__kmp_xchg_real32 ENDP
1092_TEXT   ENDS
1093
1094
1095;------------------------------------------------------------------------
1096; FUNCTION __kmp_xchg_real64
1097;
1098; kmp_real64
1099; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d );
1100;
1101; parameters:
1102;	p:	rcx
1103;	d:	xmm1 (lower 8 bytes)
1104;
1105; return:	xmm0 (lower 8 bytes)
1106PUBLIC  __kmp_xchg_real64
1107_TEXT   SEGMENT
1108        ALIGN 16
1109__kmp_xchg_real64 PROC ;NEAR
1110
1111	movd	rax, xmm1		; load "d"
1112
1113lock    xchg    QWORD PTR [rcx], rax
1114
1115	movd	xmm0, rax		; load old value into return register
1116        ret
1117
1118__kmp_xchg_real64 ENDP
1119_TEXT   ENDS
1120
1121;------------------------------------------------------------------------
1122; FUNCTION __kmp_load_x87_fpu_control_word
1123;
1124; void
1125; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1126;
1127; parameters:
1128;	p:	rcx
1129PUBLIC  __kmp_load_x87_fpu_control_word
1130_TEXT   SEGMENT
1131        ALIGN 16
1132__kmp_load_x87_fpu_control_word PROC ;NEAR
1133
1134        fldcw   WORD PTR [rcx]
1135        ret
1136
1137__kmp_load_x87_fpu_control_word ENDP
1138_TEXT   ENDS
1139
1140
1141;------------------------------------------------------------------------
1142; FUNCTION __kmp_store_x87_fpu_control_word
1143;
1144; void
1145; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1146;
1147; parameters:
1148;	p:	rcx
1149PUBLIC  __kmp_store_x87_fpu_control_word
1150_TEXT   SEGMENT
1151        ALIGN 16
1152__kmp_store_x87_fpu_control_word PROC ;NEAR
1153
1154        fstcw   WORD PTR [rcx]
1155        ret
1156
1157__kmp_store_x87_fpu_control_word ENDP
1158_TEXT   ENDS
1159
1160
1161;------------------------------------------------------------------------
1162; FUNCTION __kmp_clear_x87_fpu_status_word
1163;
1164; void
1165; __kmp_clear_x87_fpu_status_word()
1166PUBLIC  __kmp_clear_x87_fpu_status_word
1167_TEXT   SEGMENT
1168        ALIGN 16
1169__kmp_clear_x87_fpu_status_word PROC ;NEAR
1170
1171        fnclex
1172        ret
1173
1174__kmp_clear_x87_fpu_status_word ENDP
1175_TEXT   ENDS
1176
1177
1178;------------------------------------------------------------------------
1179; FUNCTION __kmp_invoke_microtask
1180;
1181; typedef void  (*microtask_t)( int *gtid, int *tid, ... );
1182;
1183; int
1184; __kmp_invoke_microtask( microtask_t pkfn,
1185;                         int gtid, int tid,
1186;                         int argc, void *p_argv[] ) {
1187;
1188;     (*pkfn) ( &gtid, &tid, argv[0], ... );
1189;     return 1;
1190; }
1191;
1192; note:
1193;      just before call to pkfn must have rsp 128-byte aligned for compiler
1194;
1195; parameters:
1196;      rcx:   pkfn	16[rbp]
1197;      edx:   gtid	24[rbp]
1198;      r8d:   tid	32[rbp]
1199;      r9d:   argc	40[rbp]
1200;      [st]:  p_argv	48[rbp]
1201;
1202; reg temps:
1203;      rax:   used all over the place
1204;      rdx:   used all over the place
1205;      rcx:   used as argument counter for push parms loop
1206;      r10:   used to hold pkfn function pointer argument
1207;
1208; return:      eax    (always 1/TRUE)
1209$_pkfn   = 16
1210$_gtid   = 24
1211$_tid    = 32
1212$_argc   = 40
1213$_p_argv = 48
1214if OMPT_SUPPORT
1215$_exit_frame = 56
1216endif
1217
1218PUBLIC  __kmp_invoke_microtask
1219_TEXT   SEGMENT
1220        ALIGN 16
1221
1222__kmp_invoke_microtask PROC FRAME ;NEAR
1223	mov	QWORD PTR 16[rsp], rdx	; home gtid parameter
1224	mov 	QWORD PTR 24[rsp], r8	; home tid parameter
1225        push    rbp		; save base pointer
1226        .pushreg rbp
1227	sub	rsp, 0		; no fixed allocation necessary - end prolog
1228
1229        lea     rbp, QWORD PTR [rsp]   	; establish the base pointer
1230        .setframe rbp, 0
1231        .ENDPROLOG
1232if OMPT_SUPPORT
1233        mov     rax, QWORD PTR $_exit_frame[rbp]
1234        mov     QWORD PTR [rax], rbp
1235endif
1236	mov	r10, rcx	; save pkfn pointer for later
1237
1238;; ------------------------------------------------------------
1239        mov     rax, r9		; rax <= argc
1240        cmp     rax, 2
1241        jge     SHORT $_kmp_invoke_stack_align
1242        mov     rax, 2          ; set 4 homes if less than 2 parms
1243$_kmp_invoke_stack_align:
1244	lea     rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8
1245	mov     rax, rsp        ; Save current SP into rax
1246	sub	rax, rdx	; rsp - ((argc+2)*8) -> rax
1247				; without align, rsp would be this
1248	and     rax, -128       ; Mask off 7 bits (128-byte align)
1249	add     rax, rdx        ; add space for push's in a loop below
1250	mov     rsp, rax        ; Prepare the stack ptr
1251				; Now it will align to 128-byte at the call
1252;; ------------------------------------------------------------
1253        			; setup pkfn parameter stack
1254	mov	rax, r9		; rax <= argc
1255	shl	rax, 3		; rax <= argc*8
1256	mov	rdx, QWORD PTR $_p_argv[rbp]	; rdx <= p_argv
1257	add	rdx, rax	; rdx <= &p_argv[argc]
1258	mov	rcx, r9		; rcx <= argc
1259	jecxz	SHORT $_kmp_invoke_pass_parms	; nothing to push if argc=0
1260	cmp	ecx, 1		; if argc=1 branch ahead
1261	je	SHORT $_kmp_invoke_one_parm
1262	sub	ecx, 2		; if argc=2 branch ahead, subtract two from
1263	je	SHORT $_kmp_invoke_two_parms
1264
1265$_kmp_invoke_push_parms:	; push last - 5th parms to pkfn on stack
1266	sub	rdx, 8		; decrement p_argv pointer to previous parm
1267	mov 	r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1]
1268	push	r8		; push p_argv[rcx-1] onto stack (reverse order)
1269	sub	ecx, 1
1270	jecxz	SHORT $_kmp_invoke_two_parms
1271	jmp	SHORT $_kmp_invoke_push_parms
1272
1273$_kmp_invoke_two_parms:
1274	sub	rdx, 8		; put 4th parm to pkfn in r9
1275	mov	r9, QWORD PTR [rdx] ; r9 <= p_argv[1]
1276
1277$_kmp_invoke_one_parm:
1278        sub	rdx, 8		; put 3rd parm to pkfn in r8
1279	mov	r8, QWORD PTR [rdx] ; r8 <= p_argv[0]
1280
1281$_kmp_invoke_pass_parms:	; put 1st & 2nd parms to pkfn in registers
1282	lea	rdx, QWORD PTR $_tid[rbp]  ; rdx <= &tid (2nd parm to pkfn)
1283	lea	rcx, QWORD PTR $_gtid[rbp] ; rcx <= &gtid (1st parm to pkfn)
1284        sub     rsp, 32         ; add stack space for first four parms
1285	mov	rax, r10	; rax <= pkfn
1286	call	rax		; call (*pkfn)()
1287	mov	rax, 1		; move 1 into return register;
1288
1289        lea     rsp, QWORD PTR [rbp]	; restore stack pointer
1290
1291;	add	rsp, 0		; no fixed allocation necessary - start epilog
1292        pop     rbp		; restore frame pointer
1293        ret
1294__kmp_invoke_microtask ENDP
1295_TEXT   ENDS
1296
1297endif
1298
1299END
1300