1; z_Windows_NT-586_asm.asm: - microtasking routines specifically 2; written for IA-32 architecture and Intel(R) 64 running Windows* OS 3 4; 5;//===----------------------------------------------------------------------===// 6;// 7;// The LLVM Compiler Infrastructure 8;// 9;// This file is dual licensed under the MIT and the University of Illinois Open 10;// Source Licenses. See LICENSE.txt for details. 11;// 12;//===----------------------------------------------------------------------===// 13; 14 15 TITLE z_Windows_NT-586_asm.asm 16 17; ============================= IA-32 architecture ========================== 18ifdef _M_IA32 19 20 .586P 21 22if @Version gt 510 23 .model HUGE 24else 25_TEXT SEGMENT PARA USE32 PUBLIC 'CODE' 26_TEXT ENDS 27_DATA SEGMENT DWORD USE32 PUBLIC 'DATA' 28_DATA ENDS 29CONST SEGMENT DWORD USE32 PUBLIC 'CONST' 30CONST ENDS 31_BSS SEGMENT DWORD USE32 PUBLIC 'BSS' 32_BSS ENDS 33$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM' 34$$SYMBOLS ENDS 35$$TYPES SEGMENT BYTE USE32 'DEBTYP' 36$$TYPES ENDS 37_TLS SEGMENT DWORD USE32 PUBLIC 'TLS' 38_TLS ENDS 39FLAT GROUP _DATA, CONST, _BSS 40 ASSUME CS: FLAT, DS: FLAT, SS: FLAT 41endif 42 43 44;------------------------------------------------------------------------ 45; FUNCTION ___kmp_x86_pause 46; 47; void 48; __kmp_x86_pause( void ) 49PUBLIC ___kmp_x86_pause 50_p$ = 4 51_d$ = 8 52_TEXT SEGMENT 53 ALIGN 16 54___kmp_x86_pause PROC NEAR 55 56 db 0f3H 57 db 090H ;; pause 58 ret 59 60___kmp_x86_pause ENDP 61_TEXT ENDS 62 63;------------------------------------------------------------------------ 64; FUNCTION ___kmp_x86_cpuid 65; 66; void 67; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 68PUBLIC ___kmp_x86_cpuid 69_TEXT SEGMENT 70 ALIGN 16 71_mode$ = 8 72_mode2$ = 12 73_p$ = 16 74_eax$ = 0 75_ebx$ = 4 76_ecx$ = 8 77_edx$ = 12 78 79___kmp_x86_cpuid PROC NEAR 80 81 push ebp 82 mov ebp, esp 83 84 push edi 85 push ebx 86 push ecx 87 push edx 88 89 mov eax, DWORD PTR _mode$[ebp] 90 mov ecx, DWORD PTR _mode2$[ebp] 91 cpuid ; Query the CPUID for the current processor 92 93 mov edi, DWORD PTR _p$[ebp] 94 mov DWORD PTR _eax$[ edi ], eax 95 mov DWORD PTR _ebx$[ edi ], ebx 96 mov DWORD PTR _ecx$[ edi ], ecx 97 mov DWORD PTR _edx$[ edi ], edx 98 99 pop edx 100 pop ecx 101 pop ebx 102 pop edi 103 104 mov esp, ebp 105 pop ebp 106 ret 107 108___kmp_x86_cpuid ENDP 109_TEXT ENDS 110 111;------------------------------------------------------------------------ 112; FUNCTION ___kmp_test_then_add32 113; 114; kmp_int32 115; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 116PUBLIC ___kmp_test_then_add32 117_p$ = 4 118_d$ = 8 119_TEXT SEGMENT 120 ALIGN 16 121___kmp_test_then_add32 PROC NEAR 122 123 mov eax, DWORD PTR _d$[esp] 124 mov ecx, DWORD PTR _p$[esp] 125lock xadd DWORD PTR [ecx], eax 126 ret 127 128___kmp_test_then_add32 ENDP 129_TEXT ENDS 130 131;------------------------------------------------------------------------ 132; FUNCTION ___kmp_compare_and_store8 133; 134; kmp_int8 135; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 136PUBLIC ___kmp_compare_and_store8 137_TEXT SEGMENT 138 ALIGN 16 139_p$ = 4 140_cv$ = 8 141_sv$ = 12 142 143___kmp_compare_and_store8 PROC NEAR 144 145 mov ecx, DWORD PTR _p$[esp] 146 mov al, BYTE PTR _cv$[esp] 147 mov dl, BYTE PTR _sv$[esp] 148lock cmpxchg BYTE PTR [ecx], dl 149 sete al ; if al == [ecx] set al = 1 else set al = 0 150 and eax, 1 ; sign extend previous instruction 151 ret 152 153___kmp_compare_and_store8 ENDP 154_TEXT ENDS 155 156;------------------------------------------------------------------------ 157; FUNCTION ___kmp_compare_and_store16 158; 159; kmp_int16 160; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 161PUBLIC ___kmp_compare_and_store16 162_TEXT SEGMENT 163 ALIGN 16 164_p$ = 4 165_cv$ = 8 166_sv$ = 12 167 168___kmp_compare_and_store16 PROC NEAR 169 170 mov ecx, DWORD PTR _p$[esp] 171 mov ax, WORD PTR _cv$[esp] 172 mov dx, WORD PTR _sv$[esp] 173lock cmpxchg WORD PTR [ecx], dx 174 sete al ; if ax == [ecx] set al = 1 else set al = 0 175 and eax, 1 ; sign extend previous instruction 176 ret 177 178___kmp_compare_and_store16 ENDP 179_TEXT ENDS 180 181;------------------------------------------------------------------------ 182; FUNCTION ___kmp_compare_and_store32 183; 184; kmp_int32 185; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 186PUBLIC ___kmp_compare_and_store32 187_TEXT SEGMENT 188 ALIGN 16 189_p$ = 4 190_cv$ = 8 191_sv$ = 12 192 193___kmp_compare_and_store32 PROC NEAR 194 195 mov ecx, DWORD PTR _p$[esp] 196 mov eax, DWORD PTR _cv$[esp] 197 mov edx, DWORD PTR _sv$[esp] 198lock cmpxchg DWORD PTR [ecx], edx 199 sete al ; if eax == [ecx] set al = 1 else set al = 0 200 and eax, 1 ; sign extend previous instruction 201 ret 202 203___kmp_compare_and_store32 ENDP 204_TEXT ENDS 205 206;------------------------------------------------------------------------ 207; FUNCTION ___kmp_compare_and_store64 208; 209; kmp_int32 210; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 211PUBLIC ___kmp_compare_and_store64 212_TEXT SEGMENT 213 ALIGN 16 214_p$ = 8 215_cv_low$ = 12 216_cv_high$ = 16 217_sv_low$ = 20 218_sv_high$ = 24 219 220___kmp_compare_and_store64 PROC NEAR 221 222 push ebp 223 mov ebp, esp 224 push ebx 225 push edi 226 mov edi, DWORD PTR _p$[ebp] 227 mov eax, DWORD PTR _cv_low$[ebp] 228 mov edx, DWORD PTR _cv_high$[ebp] 229 mov ebx, DWORD PTR _sv_low$[ebp] 230 mov ecx, DWORD PTR _sv_high$[ebp] 231lock cmpxchg8b QWORD PTR [edi] 232 sete al ; if edx:eax == [edi] set al = 1 else set al = 0 233 and eax, 1 ; sign extend previous instruction 234 pop edi 235 pop ebx 236 mov esp, ebp 237 pop ebp 238 ret 239 240___kmp_compare_and_store64 ENDP 241_TEXT ENDS 242 243;------------------------------------------------------------------------ 244; FUNCTION ___kmp_xchg_fixed8 245; 246; kmp_int8 247; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 248PUBLIC ___kmp_xchg_fixed8 249_TEXT SEGMENT 250 ALIGN 16 251_p$ = 4 252_d$ = 8 253 254___kmp_xchg_fixed8 PROC NEAR 255 256 mov ecx, DWORD PTR _p$[esp] 257 mov al, BYTE PTR _d$[esp] 258lock xchg BYTE PTR [ecx], al 259 ret 260 261___kmp_xchg_fixed8 ENDP 262_TEXT ENDS 263 264;------------------------------------------------------------------------ 265; FUNCTION ___kmp_xchg_fixed16 266; 267; kmp_int16 268; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 269PUBLIC ___kmp_xchg_fixed16 270_TEXT SEGMENT 271 ALIGN 16 272_p$ = 4 273_d$ = 8 274 275___kmp_xchg_fixed16 PROC NEAR 276 277 mov ecx, DWORD PTR _p$[esp] 278 mov ax, WORD PTR _d$[esp] 279lock xchg WORD PTR [ecx], ax 280 ret 281 282___kmp_xchg_fixed16 ENDP 283_TEXT ENDS 284 285;------------------------------------------------------------------------ 286; FUNCTION ___kmp_xchg_fixed32 287; 288; kmp_int32 289; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 290PUBLIC ___kmp_xchg_fixed32 291_TEXT SEGMENT 292 ALIGN 16 293_p$ = 4 294_d$ = 8 295 296___kmp_xchg_fixed32 PROC NEAR 297 298 mov ecx, DWORD PTR _p$[esp] 299 mov eax, DWORD PTR _d$[esp] 300lock xchg DWORD PTR [ecx], eax 301 ret 302 303___kmp_xchg_fixed32 ENDP 304_TEXT ENDS 305 306 307;------------------------------------------------------------------------ 308; FUNCTION ___kmp_xchg_real32 309; 310; kmp_real32 311; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); 312PUBLIC ___kmp_xchg_real32 313_TEXT SEGMENT 314 ALIGN 16 315_p$ = 8 316_d$ = 12 317_old_value$ = -4 318 319___kmp_xchg_real32 PROC NEAR 320 321 push ebp 322 mov ebp, esp 323 sub esp, 4 324 push esi 325 mov esi, DWORD PTR _p$[ebp] 326 327 fld DWORD PTR [esi] 328 ;; load <addr> 329 fst DWORD PTR _old_value$[ebp] 330 ;; store into old_value 331 332 mov eax, DWORD PTR _d$[ebp] 333 334lock xchg DWORD PTR [esi], eax 335 336 fld DWORD PTR _old_value$[ebp] 337 ;; return old_value 338 pop esi 339 mov esp, ebp 340 pop ebp 341 ret 342 343___kmp_xchg_real32 ENDP 344_TEXT ENDS 345 346 347;------------------------------------------------------------------------ 348; FUNCTION ___kmp_compare_and_store_ret8 349; 350; kmp_int8 351; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 352PUBLIC ___kmp_compare_and_store_ret8 353_TEXT SEGMENT 354 ALIGN 16 355_p$ = 4 356_cv$ = 8 357_sv$ = 12 358 359___kmp_compare_and_store_ret8 PROC NEAR 360 361 mov ecx, DWORD PTR _p$[esp] 362 mov al, BYTE PTR _cv$[esp] 363 mov dl, BYTE PTR _sv$[esp] 364lock cmpxchg BYTE PTR [ecx], dl 365 ret 366 367___kmp_compare_and_store_ret8 ENDP 368_TEXT ENDS 369 370;------------------------------------------------------------------------ 371; FUNCTION ___kmp_compare_and_store_ret16 372; 373; kmp_int16 374; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 375PUBLIC ___kmp_compare_and_store_ret16 376_TEXT SEGMENT 377 ALIGN 16 378_p$ = 4 379_cv$ = 8 380_sv$ = 12 381 382___kmp_compare_and_store_ret16 PROC NEAR 383 384 mov ecx, DWORD PTR _p$[esp] 385 mov ax, WORD PTR _cv$[esp] 386 mov dx, WORD PTR _sv$[esp] 387lock cmpxchg WORD PTR [ecx], dx 388 ret 389 390___kmp_compare_and_store_ret16 ENDP 391_TEXT ENDS 392 393;------------------------------------------------------------------------ 394; FUNCTION ___kmp_compare_and_store_ret32 395; 396; kmp_int32 397; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 398PUBLIC ___kmp_compare_and_store_ret32 399_TEXT SEGMENT 400 ALIGN 16 401_p$ = 4 402_cv$ = 8 403_sv$ = 12 404 405___kmp_compare_and_store_ret32 PROC NEAR 406 407 mov ecx, DWORD PTR _p$[esp] 408 mov eax, DWORD PTR _cv$[esp] 409 mov edx, DWORD PTR _sv$[esp] 410lock cmpxchg DWORD PTR [ecx], edx 411 ret 412 413___kmp_compare_and_store_ret32 ENDP 414_TEXT ENDS 415 416;------------------------------------------------------------------------ 417; FUNCTION ___kmp_compare_and_store_ret64 418; 419; kmp_int64 420; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 421PUBLIC ___kmp_compare_and_store_ret64 422_TEXT SEGMENT 423 ALIGN 16 424_p$ = 8 425_cv_low$ = 12 426_cv_high$ = 16 427_sv_low$ = 20 428_sv_high$ = 24 429 430___kmp_compare_and_store_ret64 PROC NEAR 431 432 push ebp 433 mov ebp, esp 434 push ebx 435 push edi 436 mov edi, DWORD PTR _p$[ebp] 437 mov eax, DWORD PTR _cv_low$[ebp] 438 mov edx, DWORD PTR _cv_high$[ebp] 439 mov ebx, DWORD PTR _sv_low$[ebp] 440 mov ecx, DWORD PTR _sv_high$[ebp] 441lock cmpxchg8b QWORD PTR [edi] 442 pop edi 443 pop ebx 444 mov esp, ebp 445 pop ebp 446 ret 447 448___kmp_compare_and_store_ret64 ENDP 449_TEXT ENDS 450 451;------------------------------------------------------------------------ 452; FUNCTION ___kmp_load_x87_fpu_control_word 453; 454; void 455; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 456; 457; parameters: 458; p: 4(%esp) 459PUBLIC ___kmp_load_x87_fpu_control_word 460_TEXT SEGMENT 461 ALIGN 16 462_p$ = 4 463 464___kmp_load_x87_fpu_control_word PROC NEAR 465 466 mov eax, DWORD PTR _p$[esp] 467 fldcw WORD PTR [eax] 468 ret 469 470___kmp_load_x87_fpu_control_word ENDP 471_TEXT ENDS 472 473;------------------------------------------------------------------------ 474; FUNCTION ___kmp_store_x87_fpu_control_word 475; 476; void 477; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 478; 479; parameters: 480; p: 4(%esp) 481PUBLIC ___kmp_store_x87_fpu_control_word 482_TEXT SEGMENT 483 ALIGN 16 484_p$ = 4 485 486___kmp_store_x87_fpu_control_word PROC NEAR 487 488 mov eax, DWORD PTR _p$[esp] 489 fstcw WORD PTR [eax] 490 ret 491 492___kmp_store_x87_fpu_control_word ENDP 493_TEXT ENDS 494 495;------------------------------------------------------------------------ 496; FUNCTION ___kmp_clear_x87_fpu_status_word 497; 498; void 499; __kmp_clear_x87_fpu_status_word(); 500PUBLIC ___kmp_clear_x87_fpu_status_word 501_TEXT SEGMENT 502 ALIGN 16 503 504___kmp_clear_x87_fpu_status_word PROC NEAR 505 506 fnclex 507 ret 508 509___kmp_clear_x87_fpu_status_word ENDP 510_TEXT ENDS 511 512 513;------------------------------------------------------------------------ 514; FUNCTION ___kmp_invoke_microtask 515; 516; typedef void (*microtask_t)( int *gtid, int *tid, ... ); 517; 518; int 519; __kmp_invoke_microtask( microtask_t pkfn, 520; int gtid, int tid, 521; int argc, void *p_argv[] ) 522PUBLIC ___kmp_invoke_microtask 523_TEXT SEGMENT 524 ALIGN 16 525_pkfn$ = 8 526_gtid$ = 12 527_tid$ = 16 528_argc$ = 20 529_argv$ = 24 530if OMPT_SUPPORT 531_exit_frame$ = 28 532endif 533_i$ = -8 534_stk_adj$ = -16 535_vptr$ = -12 536_qptr$ = -4 537 538___kmp_invoke_microtask PROC NEAR 539; Line 102 540 push ebp 541 mov ebp, esp 542 sub esp, 16 ; 00000010H 543 push ebx 544 push esi 545 push edi 546if OMPT_SUPPORT 547 mov eax, DWORD PTR _exit_frame$[ebp] 548 mov DWORD PTR [eax], ebp 549endif 550; Line 114 551 mov eax, DWORD PTR _argc$[ebp] 552 mov DWORD PTR _i$[ebp], eax 553 554;; ------------------------------------------------------------ 555 lea edx, DWORD PTR [eax*4+8] 556 mov ecx, esp ; Save current SP into ECX 557 mov eax,edx ; Save the size of the args in eax 558 sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this 559 mov edx,ecx ; Save to edx 560 and ecx,-128 ; Mask off 7 bits 561 sub edx,ecx ; Amount to subtract from esp 562 sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call 563 564 add edx,eax ; Calculate total size of the stack decrement. 565 mov DWORD PTR _stk_adj$[ebp], edx 566;; ------------------------------------------------------------ 567 568 jmp SHORT $L22237 569$L22238: 570 mov ecx, DWORD PTR _i$[ebp] 571 sub ecx, 1 572 mov DWORD PTR _i$[ebp], ecx 573$L22237: 574 cmp DWORD PTR _i$[ebp], 0 575 jle SHORT $L22239 576; Line 116 577 mov edx, DWORD PTR _i$[ebp] 578 mov eax, DWORD PTR _argv$[ebp] 579 mov ecx, DWORD PTR [eax+edx*4-4] 580 mov DWORD PTR _vptr$[ebp], ecx 581; Line 123 582 mov eax, DWORD PTR _vptr$[ebp] 583; Line 124 584 push eax 585; Line 127 586 jmp SHORT $L22238 587$L22239: 588; Line 129 589 lea edx, DWORD PTR _tid$[ebp] 590 mov DWORD PTR _vptr$[ebp], edx 591; Line 130 592 lea eax, DWORD PTR _gtid$[ebp] 593 mov DWORD PTR _qptr$[ebp], eax 594; Line 143 595 mov eax, DWORD PTR _vptr$[ebp] 596; Line 144 597 push eax 598; Line 145 599 mov eax, DWORD PTR _qptr$[ebp] 600; Line 146 601 push eax 602; Line 147 603 call DWORD PTR _pkfn$[ebp] 604; Line 148 605 add esp, DWORD PTR _stk_adj$[ebp] 606; Line 152 607 mov eax, 1 608; Line 153 609 pop edi 610 pop esi 611 pop ebx 612 mov esp, ebp 613 pop ebp 614 ret 0 615___kmp_invoke_microtask ENDP 616_TEXT ENDS 617 618endif 619 620; ==================================== Intel(R) 64 =================================== 621 622ifdef _M_AMD64 623 624;------------------------------------------------------------------------ 625; FUNCTION __kmp_x86_cpuid 626; 627; void 628; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 629; 630; parameters: 631; mode: ecx 632; mode2: edx 633; cpuid_buffer: r8 634PUBLIC __kmp_x86_cpuid 635_TEXT SEGMENT 636 ALIGN 16 637 638__kmp_x86_cpuid PROC FRAME ;NEAR 639 640 push rbp 641 .pushreg rbp 642 mov rbp, rsp 643 .setframe rbp, 0 644 push rbx ; callee-save register 645 .pushreg rbx 646 .ENDPROLOG 647 648 mov r10, r8 ; p parameter 649 mov eax, ecx ; mode parameter 650 mov ecx, edx ; mode2 parameter 651 cpuid ; Query the CPUID for the current processor 652 653 mov DWORD PTR 0[ r10 ], eax ; store results into buffer 654 mov DWORD PTR 4[ r10 ], ebx 655 mov DWORD PTR 8[ r10 ], ecx 656 mov DWORD PTR 12[ r10 ], edx 657 658 pop rbx ; callee-save register 659 mov rsp, rbp 660 pop rbp 661 ret 662 663__kmp_x86_cpuid ENDP 664_TEXT ENDS 665 666 667;------------------------------------------------------------------------ 668; FUNCTION __kmp_test_then_add32 669; 670; kmp_int32 671; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 672; 673; parameters: 674; p: rcx 675; d: edx 676; 677; return: eax 678PUBLIC __kmp_test_then_add32 679_TEXT SEGMENT 680 ALIGN 16 681__kmp_test_then_add32 PROC ;NEAR 682 683 mov eax, edx 684lock xadd DWORD PTR [rcx], eax 685 ret 686 687__kmp_test_then_add32 ENDP 688_TEXT ENDS 689 690 691;------------------------------------------------------------------------ 692; FUNCTION __kmp_test_then_add64 693; 694; kmp_int32 695; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 696; 697; parameters: 698; p: rcx 699; d: rdx 700; 701; return: rax 702PUBLIC __kmp_test_then_add64 703_TEXT SEGMENT 704 ALIGN 16 705__kmp_test_then_add64 PROC ;NEAR 706 707 mov rax, rdx 708lock xadd QWORD PTR [rcx], rax 709 ret 710 711__kmp_test_then_add64 ENDP 712_TEXT ENDS 713 714 715;------------------------------------------------------------------------ 716; FUNCTION __kmp_compare_and_store8 717; 718; kmp_int8 719; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 720; parameters: 721; p: rcx 722; cv: edx 723; sv: r8d 724; 725; return: eax 726PUBLIC __kmp_compare_and_store8 727_TEXT SEGMENT 728 ALIGN 16 729 730__kmp_compare_and_store8 PROC ;NEAR 731 732 mov al, dl ; "cv" 733 mov edx, r8d ; "sv" 734lock cmpxchg BYTE PTR [rcx], dl 735 sete al ; if al == [rcx] set al = 1 else set al = 0 736 and rax, 1 ; sign extend previous instruction 737 ret 738 739__kmp_compare_and_store8 ENDP 740_TEXT ENDS 741 742 743;------------------------------------------------------------------------ 744; FUNCTION __kmp_compare_and_store16 745; 746; kmp_int16 747; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 748; parameters: 749; p: rcx 750; cv: edx 751; sv: r8d 752; 753; return: eax 754PUBLIC __kmp_compare_and_store16 755_TEXT SEGMENT 756 ALIGN 16 757 758__kmp_compare_and_store16 PROC ;NEAR 759 760 mov ax, dx ; "cv" 761 mov edx, r8d ; "sv" 762lock cmpxchg WORD PTR [rcx], dx 763 sete al ; if ax == [rcx] set al = 1 else set al = 0 764 and rax, 1 ; sign extend previous instruction 765 ret 766 767__kmp_compare_and_store16 ENDP 768_TEXT ENDS 769 770 771;------------------------------------------------------------------------ 772; FUNCTION __kmp_compare_and_store32 773; 774; kmp_int32 775; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 776; parameters: 777; p: rcx 778; cv: edx 779; sv: r8d 780; 781; return: eax 782PUBLIC __kmp_compare_and_store32 783_TEXT SEGMENT 784 ALIGN 16 785 786__kmp_compare_and_store32 PROC ;NEAR 787 788 mov eax, edx ; "cv" 789 mov edx, r8d ; "sv" 790lock cmpxchg DWORD PTR [rcx], edx 791 sete al ; if eax == [rcx] set al = 1 else set al = 0 792 and rax, 1 ; sign extend previous instruction 793 ret 794 795__kmp_compare_and_store32 ENDP 796_TEXT ENDS 797 798 799;------------------------------------------------------------------------ 800; FUNCTION __kmp_compare_and_store64 801; 802; kmp_int32 803; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 804; parameters: 805; p: rcx 806; cv: rdx 807; sv: r8 808; 809; return: eax 810PUBLIC __kmp_compare_and_store64 811_TEXT SEGMENT 812 ALIGN 16 813 814__kmp_compare_and_store64 PROC ;NEAR 815 816 mov rax, rdx ; "cv" 817 mov rdx, r8 ; "sv" 818lock cmpxchg QWORD PTR [rcx], rdx 819 sete al ; if rax == [rcx] set al = 1 else set al = 0 820 and rax, 1 ; sign extend previous instruction 821 ret 822 823__kmp_compare_and_store64 ENDP 824_TEXT ENDS 825 826 827;------------------------------------------------------------------------ 828; FUNCTION ___kmp_xchg_fixed8 829; 830; kmp_int8 831; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 832; 833; parameters: 834; p: rcx 835; d: dl 836; 837; return: al 838PUBLIC __kmp_xchg_fixed8 839_TEXT SEGMENT 840 ALIGN 16 841 842__kmp_xchg_fixed8 PROC ;NEAR 843 844 mov al, dl 845lock xchg BYTE PTR [rcx], al 846 ret 847 848__kmp_xchg_fixed8 ENDP 849_TEXT ENDS 850 851 852;------------------------------------------------------------------------ 853; FUNCTION ___kmp_xchg_fixed16 854; 855; kmp_int16 856; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 857; 858; parameters: 859; p: rcx 860; d: dx 861; 862; return: ax 863PUBLIC __kmp_xchg_fixed16 864_TEXT SEGMENT 865 ALIGN 16 866 867__kmp_xchg_fixed16 PROC ;NEAR 868 869 mov ax, dx 870lock xchg WORD PTR [rcx], ax 871 ret 872 873__kmp_xchg_fixed16 ENDP 874_TEXT ENDS 875 876 877;------------------------------------------------------------------------ 878; FUNCTION ___kmp_xchg_fixed32 879; 880; kmp_int32 881; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 882; 883; parameters: 884; p: rcx 885; d: edx 886; 887; return: eax 888PUBLIC __kmp_xchg_fixed32 889_TEXT SEGMENT 890 ALIGN 16 891__kmp_xchg_fixed32 PROC ;NEAR 892 893 mov eax, edx 894lock xchg DWORD PTR [rcx], eax 895 ret 896 897__kmp_xchg_fixed32 ENDP 898_TEXT ENDS 899 900 901;------------------------------------------------------------------------ 902; FUNCTION ___kmp_xchg_fixed64 903; 904; kmp_int64 905; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 906; 907; parameters: 908; p: rcx 909; d: rdx 910; 911; return: rax 912PUBLIC __kmp_xchg_fixed64 913_TEXT SEGMENT 914 ALIGN 16 915__kmp_xchg_fixed64 PROC ;NEAR 916 917 mov rax, rdx 918lock xchg QWORD PTR [rcx], rax 919 ret 920 921__kmp_xchg_fixed64 ENDP 922_TEXT ENDS 923 924 925;------------------------------------------------------------------------ 926; FUNCTION __kmp_compare_and_store_ret8 927; 928; kmp_int8 929; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 930; parameters: 931; p: rcx 932; cv: edx 933; sv: r8d 934; 935; return: eax 936PUBLIC __kmp_compare_and_store_ret8 937_TEXT SEGMENT 938 ALIGN 16 939 940__kmp_compare_and_store_ret8 PROC ;NEAR 941 mov al, dl ; "cv" 942 mov edx, r8d ; "sv" 943lock cmpxchg BYTE PTR [rcx], dl 944 ; Compare AL with [rcx]. If equal set 945 ; ZF and exchange DL with [rcx]. Else, clear 946 ; ZF and load [rcx] into AL. 947 ret 948 949__kmp_compare_and_store_ret8 ENDP 950_TEXT ENDS 951 952 953;------------------------------------------------------------------------ 954; FUNCTION __kmp_compare_and_store_ret16 955; 956; kmp_int16 957; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 958; parameters: 959; p: rcx 960; cv: edx 961; sv: r8d 962; 963; return: eax 964PUBLIC __kmp_compare_and_store_ret16 965_TEXT SEGMENT 966 ALIGN 16 967 968__kmp_compare_and_store_ret16 PROC ;NEAR 969 970 mov ax, dx ; "cv" 971 mov edx, r8d ; "sv" 972lock cmpxchg WORD PTR [rcx], dx 973 ret 974 975__kmp_compare_and_store_ret16 ENDP 976_TEXT ENDS 977 978 979;------------------------------------------------------------------------ 980; FUNCTION __kmp_compare_and_store_ret32 981; 982; kmp_int32 983; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 984; parameters: 985; p: rcx 986; cv: edx 987; sv: r8d 988; 989; return: eax 990PUBLIC __kmp_compare_and_store_ret32 991_TEXT SEGMENT 992 ALIGN 16 993 994__kmp_compare_and_store_ret32 PROC ;NEAR 995 996 mov eax, edx ; "cv" 997 mov edx, r8d ; "sv" 998lock cmpxchg DWORD PTR [rcx], edx 999 ret 1000 1001__kmp_compare_and_store_ret32 ENDP 1002_TEXT ENDS 1003 1004 1005;------------------------------------------------------------------------ 1006; FUNCTION __kmp_compare_and_store_ret64 1007; 1008; kmp_int64 1009; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 1010; parameters: 1011; p: rcx 1012; cv: rdx 1013; sv: r8 1014; 1015; return: rax 1016PUBLIC __kmp_compare_and_store_ret64 1017_TEXT SEGMENT 1018 ALIGN 16 1019 1020__kmp_compare_and_store_ret64 PROC ;NEAR 1021 1022 mov rax, rdx ; "cv" 1023 mov rdx, r8 ; "sv" 1024lock cmpxchg QWORD PTR [rcx], rdx 1025 ret 1026 1027__kmp_compare_and_store_ret64 ENDP 1028_TEXT ENDS 1029 1030 1031;------------------------------------------------------------------------ 1032; FUNCTION __kmp_compare_and_store_loop8 1033; 1034; kmp_int8 1035; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 1036; parameters: 1037; p: rcx 1038; cv: edx 1039; sv: r8d 1040; 1041; return: al 1042PUBLIC __kmp_compare_and_store_loop8 1043_TEXT SEGMENT 1044 ALIGN 16 1045 1046__kmp_compare_and_store_loop8 PROC ;NEAR 1047$__kmp_loop: 1048 mov al, dl ; "cv" 1049 mov edx, r8d ; "sv" 1050lock cmpxchg BYTE PTR [rcx], dl 1051 ; Compare AL with [rcx]. If equal set 1052 ; ZF and exchange DL with [rcx]. Else, clear 1053 ; ZF and load [rcx] into AL. 1054 jz SHORT $__kmp_success 1055 1056 db 0f3H 1057 db 090H ; pause 1058 1059 jmp SHORT $__kmp_loop 1060 1061$__kmp_success: 1062 ret 1063 1064__kmp_compare_and_store_loop8 ENDP 1065_TEXT ENDS 1066 1067 1068;------------------------------------------------------------------------ 1069; FUNCTION __kmp_xchg_real32 1070; 1071; kmp_real32 1072; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); 1073; 1074; parameters: 1075; p: rcx 1076; d: xmm1 (lower 4 bytes) 1077; 1078; return: xmm0 (lower 4 bytes) 1079PUBLIC __kmp_xchg_real32 1080_TEXT SEGMENT 1081 ALIGN 16 1082__kmp_xchg_real32 PROC ;NEAR 1083 1084 movd eax, xmm1 ; load d 1085 1086lock xchg DWORD PTR [rcx], eax 1087 1088 movd xmm0, eax ; load old value into return register 1089 ret 1090 1091__kmp_xchg_real32 ENDP 1092_TEXT ENDS 1093 1094 1095;------------------------------------------------------------------------ 1096; FUNCTION __kmp_xchg_real64 1097; 1098; kmp_real64 1099; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d ); 1100; 1101; parameters: 1102; p: rcx 1103; d: xmm1 (lower 8 bytes) 1104; 1105; return: xmm0 (lower 8 bytes) 1106PUBLIC __kmp_xchg_real64 1107_TEXT SEGMENT 1108 ALIGN 16 1109__kmp_xchg_real64 PROC ;NEAR 1110 1111 movd rax, xmm1 ; load "d" 1112 1113lock xchg QWORD PTR [rcx], rax 1114 1115 movd xmm0, rax ; load old value into return register 1116 ret 1117 1118__kmp_xchg_real64 ENDP 1119_TEXT ENDS 1120 1121;------------------------------------------------------------------------ 1122; FUNCTION __kmp_load_x87_fpu_control_word 1123; 1124; void 1125; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 1126; 1127; parameters: 1128; p: rcx 1129PUBLIC __kmp_load_x87_fpu_control_word 1130_TEXT SEGMENT 1131 ALIGN 16 1132__kmp_load_x87_fpu_control_word PROC ;NEAR 1133 1134 fldcw WORD PTR [rcx] 1135 ret 1136 1137__kmp_load_x87_fpu_control_word ENDP 1138_TEXT ENDS 1139 1140 1141;------------------------------------------------------------------------ 1142; FUNCTION __kmp_store_x87_fpu_control_word 1143; 1144; void 1145; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 1146; 1147; parameters: 1148; p: rcx 1149PUBLIC __kmp_store_x87_fpu_control_word 1150_TEXT SEGMENT 1151 ALIGN 16 1152__kmp_store_x87_fpu_control_word PROC ;NEAR 1153 1154 fstcw WORD PTR [rcx] 1155 ret 1156 1157__kmp_store_x87_fpu_control_word ENDP 1158_TEXT ENDS 1159 1160 1161;------------------------------------------------------------------------ 1162; FUNCTION __kmp_clear_x87_fpu_status_word 1163; 1164; void 1165; __kmp_clear_x87_fpu_status_word() 1166PUBLIC __kmp_clear_x87_fpu_status_word 1167_TEXT SEGMENT 1168 ALIGN 16 1169__kmp_clear_x87_fpu_status_word PROC ;NEAR 1170 1171 fnclex 1172 ret 1173 1174__kmp_clear_x87_fpu_status_word ENDP 1175_TEXT ENDS 1176 1177 1178;------------------------------------------------------------------------ 1179; FUNCTION __kmp_invoke_microtask 1180; 1181; typedef void (*microtask_t)( int *gtid, int *tid, ... ); 1182; 1183; int 1184; __kmp_invoke_microtask( microtask_t pkfn, 1185; int gtid, int tid, 1186; int argc, void *p_argv[] ) { 1187; 1188; (*pkfn) ( >id, &tid, argv[0], ... ); 1189; return 1; 1190; } 1191; 1192; note: 1193; just before call to pkfn must have rsp 128-byte aligned for compiler 1194; 1195; parameters: 1196; rcx: pkfn 16[rbp] 1197; edx: gtid 24[rbp] 1198; r8d: tid 32[rbp] 1199; r9d: argc 40[rbp] 1200; [st]: p_argv 48[rbp] 1201; 1202; reg temps: 1203; rax: used all over the place 1204; rdx: used all over the place 1205; rcx: used as argument counter for push parms loop 1206; r10: used to hold pkfn function pointer argument 1207; 1208; return: eax (always 1/TRUE) 1209$_pkfn = 16 1210$_gtid = 24 1211$_tid = 32 1212$_argc = 40 1213$_p_argv = 48 1214if OMPT_SUPPORT 1215$_exit_frame = 56 1216endif 1217 1218PUBLIC __kmp_invoke_microtask 1219_TEXT SEGMENT 1220 ALIGN 16 1221 1222__kmp_invoke_microtask PROC FRAME ;NEAR 1223 mov QWORD PTR 16[rsp], rdx ; home gtid parameter 1224 mov QWORD PTR 24[rsp], r8 ; home tid parameter 1225 push rbp ; save base pointer 1226 .pushreg rbp 1227 sub rsp, 0 ; no fixed allocation necessary - end prolog 1228 1229 lea rbp, QWORD PTR [rsp] ; establish the base pointer 1230 .setframe rbp, 0 1231 .ENDPROLOG 1232if OMPT_SUPPORT 1233 mov rax, QWORD PTR $_exit_frame[rbp] 1234 mov QWORD PTR [rax], rbp 1235endif 1236 mov r10, rcx ; save pkfn pointer for later 1237 1238;; ------------------------------------------------------------ 1239 mov rax, r9 ; rax <= argc 1240 cmp rax, 2 1241 jge SHORT $_kmp_invoke_stack_align 1242 mov rax, 2 ; set 4 homes if less than 2 parms 1243$_kmp_invoke_stack_align: 1244 lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8 1245 mov rax, rsp ; Save current SP into rax 1246 sub rax, rdx ; rsp - ((argc+2)*8) -> rax 1247 ; without align, rsp would be this 1248 and rax, -128 ; Mask off 7 bits (128-byte align) 1249 add rax, rdx ; add space for push's in a loop below 1250 mov rsp, rax ; Prepare the stack ptr 1251 ; Now it will align to 128-byte at the call 1252;; ------------------------------------------------------------ 1253 ; setup pkfn parameter stack 1254 mov rax, r9 ; rax <= argc 1255 shl rax, 3 ; rax <= argc*8 1256 mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv 1257 add rdx, rax ; rdx <= &p_argv[argc] 1258 mov rcx, r9 ; rcx <= argc 1259 jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0 1260 cmp ecx, 1 ; if argc=1 branch ahead 1261 je SHORT $_kmp_invoke_one_parm 1262 sub ecx, 2 ; if argc=2 branch ahead, subtract two from 1263 je SHORT $_kmp_invoke_two_parms 1264 1265$_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack 1266 sub rdx, 8 ; decrement p_argv pointer to previous parm 1267 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1] 1268 push r8 ; push p_argv[rcx-1] onto stack (reverse order) 1269 sub ecx, 1 1270 jecxz SHORT $_kmp_invoke_two_parms 1271 jmp SHORT $_kmp_invoke_push_parms 1272 1273$_kmp_invoke_two_parms: 1274 sub rdx, 8 ; put 4th parm to pkfn in r9 1275 mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1] 1276 1277$_kmp_invoke_one_parm: 1278 sub rdx, 8 ; put 3rd parm to pkfn in r8 1279 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0] 1280 1281$_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers 1282 lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn) 1283 lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= >id (1st parm to pkfn) 1284 sub rsp, 32 ; add stack space for first four parms 1285 mov rax, r10 ; rax <= pkfn 1286 call rax ; call (*pkfn)() 1287 mov rax, 1 ; move 1 into return register; 1288 1289 lea rsp, QWORD PTR [rbp] ; restore stack pointer 1290 1291; add rsp, 0 ; no fixed allocation necessary - start epilog 1292 pop rbp ; restore frame pointer 1293 ret 1294__kmp_invoke_microtask ENDP 1295_TEXT ENDS 1296 1297endif 1298 1299END 1300