1divert(-1) 2 3dnl m4 macros for x86 assembler. 4 5dnl Copyright 1999-2003, 2007, 2010, 2012, 2014 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33 34dnl Notes: 35dnl 36dnl m4 isn't perfect for processing BSD style x86 assembler code, the main 37dnl problems are, 38dnl 39dnl 1. Doing define(foo,123) and then using foo in an addressing mode like 40dnl foo(%ebx) expands as a macro rather than a constant. This is worked 41dnl around by using deflit() from asm-defs.m4, instead of define(). 42dnl 43dnl 2. Immediates in macro definitions need a space or `' to stop the $ 44dnl looking like a macro parameter. For example, 45dnl 46dnl define(foo, `mov $ 123, %eax') 47dnl 48dnl This is only a problem in macro definitions, not in ordinary text, 49dnl and not in macro parameters like text passed to forloop() or ifdef(). 50 51 52deflit(GMP_LIMB_BYTES, 4) 53 54 55dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We 56dnl undefine PIC since we don't need to be position independent in this 57dnl case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc. 58 59ifdef(`DLL_EXPORT',`undefine(`PIC')') 60 61 62dnl Usage: CPUVEC_FUNCS_LIST 63dnl 64dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the 65dnl order they appear in that structure. 66 67define(CPUVEC_FUNCS_LIST, 68``add_n', 69`addlsh1_n', 70`addlsh2_n', 71`addmul_1', 72`addmul_2', 73`bdiv_dbm1c', 74`cnd_add_n', 75`cnd_sub_n', 76`com', 77`copyd', 78`copyi', 79`divexact_1', 80`divrem_1', 81`gcd_11', 82`lshift', 83`lshiftc', 84`mod_1', 85`mod_1_1p', 86`mod_1_1p_cps', 87`mod_1s_2p', 88`mod_1s_2p_cps', 89`mod_1s_4p', 90`mod_1s_4p_cps', 91`mod_34lsub1', 92`modexact_1c_odd', 93`mul_1', 94`mul_basecase', 95`mullo_basecase', 96`preinv_divrem_1', 97`preinv_mod_1', 98`redc_1', 99`redc_2', 100`rshift', 101`sqr_basecase', 102`sub_n', 103`sublsh1_n', 104`submul_1'') 105 106 107dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) 108dnl 109dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code, 110dnl since different alignments are wanted in various circumstances. So for 111dnl instance, 112dnl 113dnl TEXT 114dnl ALIGN(16) 115dnl PROLOGUE(mpn_add_n) 116dnl ... 117dnl EPILOGUE() 118 119define(`PROLOGUE_cpu', 120m4_assert_numargs(1) 121m4_assert_defined(`WANT_PROFILING') 122 `GLOBL $1 123 TYPE($1,`function') 124 COFF_TYPE($1) 125$1: 126ifelse(WANT_PROFILING,`prof', ` call_mcount') 127ifelse(WANT_PROFILING,`gprof', ` call_mcount') 128ifelse(WANT_PROFILING,`instrument',` call_instrument(enter)') 129') 130 131 132dnl Usage: COFF_TYPE(GSYM_PREFIX`'foo) 133dnl 134dnl Emit COFF style ".def ... .endef" type information for a function, when 135dnl supported. The argument should include any GSYM_PREFIX. 136dnl 137dnl See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE. 138 139define(COFF_TYPE, 140m4_assert_numargs(1) 141m4_assert_defined(`HAVE_COFF_TYPE') 142`ifelse(HAVE_COFF_TYPE,yes, 143 `.def $1 144 .scl 2 145 .type 32 146 .endef')') 147 148 149dnl Usage: call_mcount 150dnl 151dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of 152dnl the assembler routines use %ebp this way, so it's done only for the 153dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount 154dnl gets the current function from (%esp) and the parent from 4(%ebp). 155dnl 156dnl For `prof' style profiling gcc generates mcount calls without setting 157dnl up %ebp, and the same is done here. 158 159define(`call_mcount', 160m4_assert_numargs(-1) 161m4_assert_defined(`WANT_PROFILING') 162m4_assert_defined(`MCOUNT_PIC_REG') 163m4_assert_defined(`MCOUNT_NONPIC_REG') 164m4_assert_defined(`MCOUNT_PIC_CALL') 165m4_assert_defined(`MCOUNT_NONPIC_CALL') 166`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,, 167` DATA 168 ALIGN(4) 169L(mcount_data_`'mcount_counter): 170 W32 0 171 TEXT 172')dnl 173ifelse(WANT_PROFILING,`gprof', 174` pushl %ebp 175 movl %esp, %ebp 176')dnl 177ifdef(`PIC', 178` pushl %ebx 179 call_movl_eip_to_ebx 180L(mcount_here_`'mcount_counter): 181 addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx 182ifelse(MCOUNT_PIC_REG,,, 183` leal L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG') 184MCOUNT_PIC_CALL 185 popl %ebx 186',`dnl non-PIC 187ifelse(MCOUNT_NONPIC_REG,,, 188` movl `$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG 189')dnl 190MCOUNT_NONPIC_CALL 191')dnl 192ifelse(WANT_PROFILING,`gprof', 193` popl %ebp 194') 195define(`mcount_counter',incr(mcount_counter)) 196') 197 198define(mcount_counter,1) 199 200 201dnl Usage: call_instrument(enter|exit) 202dnl 203dnl Call __cyg_profile_func_enter or __cyg_profile_func_exit. 204dnl 205dnl For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves 206dnl so %ebx is just setup for these calls. It's a bit wasteful to repeat 207dnl the setup for the exit call having done it earlier for the enter, but 208dnl there's nowhere very convenient to hold %ebx through the length of a 209dnl routine, in general. 210dnl 211dnl For PIC, because instrument_current_function will be within the current 212dnl object file we can get it just as an offset from %eip, there's no need 213dnl to use the GOT. 214dnl 215dnl No attempt is made to maintain the stack alignment gcc generates with 216dnl -mpreferred-stack-boundary. This wouldn't be hard, but it seems highly 217dnl unlikely the instrumenting functions would be doing anything that'd 218dnl benefit from alignment, in particular they're unlikely to be using 219dnl doubles or long doubles on the stack. 220dnl 221dnl The FRAME scheme is used to conveniently account for the register saves 222dnl before accessing the return address. Any previous value is saved and 223dnl restored, since plenty of code keeps a value across a "ret" in the 224dnl middle of a routine. 225 226define(call_instrument, 227m4_assert_numargs(1) 228` pushdef(`FRAME',0) 229ifelse($1,exit, 230` pushl %eax FRAME_pushl() C return value 231') 232ifdef(`PIC', 233` pushl %ebx FRAME_pushl() 234 call_movl_eip_to_ebx 235L(instrument_here_`'instrument_count): 236 movl %ebx, %ecx 237 addl $_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx 238 C use addl rather than leal to avoid old gas bugs, see mpn/x86/README 239 addl $instrument_current_function-L(instrument_here_`'instrument_count), %ecx 240 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr 241 pushl %ecx FRAME_pushl() C this function 242 call GSYM_PREFIX`'__cyg_profile_func_$1@PLT 243 addl $`'8, %esp 244 popl %ebx 245', 246` C non-PIC 247 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr 248 pushl $instrument_current_function FRAME_pushl() C this function 249 call GSYM_PREFIX`'__cyg_profile_func_$1 250 addl $`'8, %esp 251') 252ifelse($1,exit, 253` popl %eax C return value 254') 255 popdef(`FRAME') 256define(`instrument_count',incr(instrument_count)) 257') 258define(instrument_count,1) 259 260 261dnl Usage: instrument_current_function 262dnl 263dnl Return the current function name for instrumenting purposes. This is 264dnl PROLOGUE_current_function, but it sticks at the first such name seen. 265dnl 266dnl Sticking to the first name seen ensures that multiple-entrypoint 267dnl functions like mpn_add_nc and mpn_add_n will make enter and exit calls 268dnl giving the same function address. 269 270define(instrument_current_function, 271m4_assert_numargs(-1) 272`ifdef(`instrument_current_function_seen', 273`instrument_current_function_seen', 274`define(`instrument_current_function_seen',PROLOGUE_current_function)dnl 275PROLOGUE_current_function')') 276 277 278dnl Usage: call_movl_eip_to_ebx 279dnl 280dnl Generate a call to L(movl_eip_to_ebx), and record the need for that 281dnl routine. 282 283define(call_movl_eip_to_ebx, 284m4_assert_numargs(-1) 285`call L(movl_eip_to_ebx) 286define(`movl_eip_to_ebx_needed',1)') 287 288dnl Usage: generate_movl_eip_to_ebx 289dnl 290dnl Emit a L(movl_eip_to_ebx) routine, if needed and not already generated. 291 292define(generate_movl_eip_to_ebx, 293m4_assert_numargs(-1) 294`ifelse(movl_eip_to_ebx_needed,1, 295`ifelse(movl_eip_to_ebx_done,1,, 296`L(movl_eip_to_ebx): 297 movl (%esp), %ebx 298 ret_internal 299define(`movl_eip_to_ebx_done',1) 300')')') 301 302 303dnl Usage: ret 304dnl 305dnl Generate a "ret", but if doing instrumented profiling then call 306dnl __cyg_profile_func_exit first. 307 308define(ret, 309m4_assert_numargs(-1) 310m4_assert_defined(`WANT_PROFILING') 311`ifelse(WANT_PROFILING,instrument, 312`ret_instrument', 313`ret_internal') 314generate_movl_eip_to_ebx 315') 316 317 318dnl Usage: ret_internal 319dnl 320dnl A plain "ret", without any __cyg_profile_func_exit call. This can be 321dnl used for a return which is internal to some function, such as when 322dnl getting %eip for PIC. 323 324define(ret_internal, 325m4_assert_numargs(-1) 326``ret'') 327 328 329dnl Usage: ret_instrument 330dnl 331dnl Generate call to __cyg_profile_func_exit and then a ret. If a ret has 332dnl already been seen from this function then jump to that chunk of code, 333dnl rather than emitting it again. 334 335define(ret_instrument, 336m4_assert_numargs(-1) 337`ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1, 338`jmp L(instrument_exit_`'instrument_current_function)', 339`define(ret_instrument_seen_`'instrument_current_function,1) 340L(instrument_exit_`'instrument_current_function): 341call_instrument(exit) 342 ret_internal')') 343 344 345dnl Usage: _GLOBAL_OFFSET_TABLE_ 346dnl 347dnl Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix. 348dnl This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still 349dnl work with systems requiring an extra underscore such as OpenBSD. 350dnl 351dnl deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come 352dnl out right, though that form doesn't work properly in gas (see 353dnl mpn/x86/README). 354 355deflit(_GLOBAL_OFFSET_TABLE_, 356m4_assert_defined(`GOT_GSYM_PREFIX') 357`GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'') 358 359 360dnl -------------------------------------------------------------------------- 361dnl Various x86 macros. 362dnl 363 364 365dnl Usage: ALIGN_OFFSET(bytes,offset) 366dnl 367dnl Align to `offset' away from a multiple of `bytes'. 368dnl 369dnl This is useful for testing, for example align to something very strict 370dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)". 371dnl 372dnl Generally you wouldn't execute across the padding, but it's done with 373dnl nop's so it'll work. 374 375define(ALIGN_OFFSET, 376m4_assert_numargs(2) 377`ALIGN($1) 378forloop(`i',1,$2,` nop 379')') 380 381 382dnl Usage: defframe(name,offset) 383dnl 384dnl Make a definition like the following with which to access a parameter 385dnl or variable on the stack. 386dnl 387dnl define(name,`FRAME+offset(%esp)') 388dnl 389dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one 390dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp). 391dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the 392dnl zero offset is wanted. 393dnl 394dnl The new macro also gets a check that when it's used FRAME is actually 395dnl defined, and that the final %esp offset isn't negative, which would 396dnl mean an attempt to access something below the current %esp. 397dnl 398dnl deflit() is used rather than a plain define(), so the new macro won't 399dnl delete any following parenthesized expression. name(%edi) will come 400dnl out say as 16(%esp)(%edi). This isn't valid assembler and should 401dnl provoke an error, which is better than silently giving just 16(%esp). 402dnl 403dnl See README for more on the suggested way to access the stack frame. 404 405define(defframe, 406m4_assert_numargs(2) 407`deflit(`$1', 408m4_assert_defined(`FRAME') 409`defframe_check_notbelow(`$1',$2,FRAME)dnl 410defframe_empty_if_zero(FRAME+($2))(%esp)')') 411 412dnl Called: defframe_empty_if_zero(expression) 413define(defframe_empty_if_zero, 414m4_assert_numargs(1) 415`ifelse(defframe_empty_if_zero_disabled,1, 416`eval($1)', 417`m4_empty_if_zero($1)')') 418 419dnl Called: defframe_check_notbelow(`name',offset,FRAME) 420define(defframe_check_notbelow, 421m4_assert_numargs(3) 422`ifelse(eval(($3)+($2)<0),1, 423`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes 424')')') 425 426 427dnl Usage: FRAME_pushl() 428dnl FRAME_popl() 429dnl FRAME_addl_esp(n) 430dnl FRAME_subl_esp(n) 431dnl 432dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl 433dnl %esp of n bytes. 434dnl 435dnl Using these macros is completely optional. Sometimes it makes more 436dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's 437dnl jumps and different sequences of FRAME values need to be used in 438dnl different places. 439 440define(FRAME_pushl, 441m4_assert_numargs(0) 442m4_assert_defined(`FRAME') 443`deflit(`FRAME',eval(FRAME+4))') 444 445define(FRAME_popl, 446m4_assert_numargs(0) 447m4_assert_defined(`FRAME') 448`deflit(`FRAME',eval(FRAME-4))') 449 450define(FRAME_addl_esp, 451m4_assert_numargs(1) 452m4_assert_defined(`FRAME') 453`deflit(`FRAME',eval(FRAME-($1)))') 454 455define(FRAME_subl_esp, 456m4_assert_numargs(1) 457m4_assert_defined(`FRAME') 458`deflit(`FRAME',eval(FRAME+($1)))') 459 460 461dnl Usage: defframe_pushl(name) 462dnl 463dnl Do a combination FRAME_pushl() and a defframe() to name the stack 464dnl location just pushed. This should come after a pushl instruction. 465dnl Putting it on the same line works and avoids lengthening the code. For 466dnl example, 467dnl 468dnl pushl %eax defframe_pushl(VAR_COUNTER) 469dnl 470dnl Notice the defframe() is done with an unquoted -FRAME thus giving its 471dnl current value without tracking future changes. 472 473define(defframe_pushl, 474m4_assert_numargs(1) 475`FRAME_pushl()defframe(`$1',-FRAME)') 476 477 478dnl -------------------------------------------------------------------------- 479dnl Assembler instruction macros. 480dnl 481 482 483dnl Usage: emms_or_femms 484dnl femms_available_p 485dnl 486dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow 487dnl femms instruction is available. emms_or_femms expands to femms if 488dnl available, or emms if not. 489dnl 490dnl emms_or_femms is meant for use in the K6 directory where plain K6 491dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are 492dnl supported together. 493dnl 494dnl On K7 femms is no longer faster and is just an alias for emms, so plain 495dnl emms may as well be used. 496 497define(femms_available_p, 498m4_assert_numargs(-1) 499`m4_ifdef_anyof_p( 500 `HAVE_HOST_CPU_k62', 501 `HAVE_HOST_CPU_k63', 502 `HAVE_HOST_CPU_athlon')') 503 504define(emms_or_femms, 505m4_assert_numargs(-1) 506`ifelse(femms_available_p,1,`femms',`emms')') 507 508 509dnl Usage: femms 510dnl 511dnl Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the 512dnl following is a replacement using .byte. 513 514define(femms, 515m4_assert_numargs(-1) 516`.byte 15,14 C AMD 3DNow femms') 517 518 519dnl Usage: jadcl0(op) 520dnl 521dnl Generate a jnc/incl as a substitute for adcl $0,op. Note this isn't an 522dnl exact replacement, since it doesn't set the flags like adcl does. 523dnl 524dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and 525dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch 526dnl misprediction penalty is small, and the multiply algorithm used leads 527dnl to a carry bit on average only 1/4 of the time. 528dnl 529dnl jadcl0_disabled can be set to 1 to instead generate an ordinary adcl 530dnl for comparison. For example, 531dnl 532dnl define(`jadcl0_disabled',1) 533dnl 534dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is 535dnl the same size as an adcl. This makes it possible to use the exact same 536dnl computed jump code when testing the relative speed of the two. 537 538define(jadcl0, 539m4_assert_numargs(1) 540`ifelse(jadcl0_disabled,1, 541 `adcl $`'0, $1', 542 `jnc L(jadcl0_`'jadcl0_counter) 543 incl $1 544L(jadcl0_`'jadcl0_counter): 545define(`jadcl0_counter',incr(jadcl0_counter))')') 546 547define(jadcl0_counter,1) 548 549 550dnl Usage: x86_lookup(target, key,value, key,value, ...) 551dnl x86_lookup_p(target, key,value, key,value, ...) 552dnl 553dnl Look for `target' among the `key' parameters. 554dnl 555dnl x86_lookup expands to the corresponding `value', or generates an error 556dnl if `target' isn't found. 557dnl 558dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not. 559 560define(x86_lookup, 561m4_assert_numargs_range(1,999) 562`ifelse(eval($#<3),1, 563`m4_error(`unrecognised part of x86 instruction: $1 564')', 565`ifelse(`$1',`$2', `$3', 566`x86_lookup(`$1',shift(shift(shift($@))))')')') 567 568define(x86_lookup_p, 569m4_assert_numargs_range(1,999) 570`ifelse(eval($#<3),1, `0', 571`ifelse(`$1',`$2', `1', 572`x86_lookup_p(`$1',shift(shift(shift($@))))')')') 573 574 575dnl Usage: x86_opcode_reg32(reg) 576dnl x86_opcode_reg32_p(reg) 577dnl 578dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given 579dnl 32-bit register, eg. `%ebp' turns into 5. 580dnl 581dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0 582dnl if not. 583 584define(x86_opcode_reg32, 585m4_assert_numargs(1) 586`x86_lookup(`$1',x86_opcode_reg32_list)') 587 588define(x86_opcode_reg32_p, 589m4_assert_onearg() 590`x86_lookup_p(`$1',x86_opcode_reg32_list)') 591 592define(x86_opcode_reg32_list, 593``%eax',0, 594`%ecx',1, 595`%edx',2, 596`%ebx',3, 597`%esp',4, 598`%ebp',5, 599`%esi',6, 600`%edi',7') 601 602 603dnl Usage: x86_opcode_tttn(cond) 604dnl 605dnl Expand to the 4-bit "tttn" field value for the given x86 branch 606dnl condition (like `c', `ae', etc). 607 608define(x86_opcode_tttn, 609m4_assert_numargs(1) 610`x86_lookup(`$1',x86_opcode_ttn_list)') 611 612define(x86_opcode_tttn_list, 613``o', 0, 614`no', 1, 615`b', 2, `c', 2, `nae',2, 616`nb', 3, `nc', 3, `ae', 3, 617`e', 4, `z', 4, 618`ne', 5, `nz', 5, 619`be', 6, `na', 6, 620`nbe', 7, `a', 7, 621`s', 8, 622`ns', 9, 623`p', 10, `pe', 10, `npo',10, 624`np', 11, `npe',11, `po', 11, 625`l', 12, `nge',12, 626`nl', 13, `ge', 13, 627`le', 14, `ng', 14, 628`nle',15, `g', 15') 629 630 631dnl Usage: cmovCC(%srcreg,%dstreg) 632dnl 633dnl Emit a cmov instruction, using a .byte sequence, since various past 634dnl versions of gas don't know cmov. For example, 635dnl 636dnl cmovz( %eax, %ebx) 637dnl 638dnl The source operand can only be a plain register. (m4 code implementing 639dnl full memory addressing modes exists, believe it or not, but isn't 640dnl currently needed and isn't included.) 641dnl 642dnl All the standard conditions are defined. Attempting to use one without 643dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke 644dnl an error. This protects against writing something old gas wouldn't 645dnl understand. 646 647dnl Called: define_cmov_many(cond,tttn,cond,tttn,...) 648define(define_cmov_many, 649`ifelse(m4_length(`$1'),0,, 650`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')') 651 652dnl Called: define_cmov(cond,tttn) 653dnl Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')') 654define(define_cmov, 655m4_assert_numargs(2) 656`define(`cmov$1', 657m4_instruction_wrapper() 658m4_assert_numargs(2) 659`cmov_internal'(m4_doublequote($`'0),``$2'',dnl 660m4_doublequote($`'1),m4_doublequote($`'2)))') 661 662define_cmov_many(x86_opcode_tttn_list) 663 664dnl Called: cmov_internal(name,tttn,src,dst) 665define(cmov_internal, 666m4_assert_numargs(4) 667`.byte dnl 66815, dnl 669eval(64+$2), dnl 670eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl 671 C `$1 $3, $4'') 672 673 674dnl Usage: x86_opcode_regmmx(reg) 675dnl 676dnl Validate the given mmx register, and return its number, 0 to 7. 677 678define(x86_opcode_regmmx, 679m4_assert_numargs(1) 680`x86_lookup(`$1',x86_opcode_regmmx_list)') 681 682define(x86_opcode_regmmx_list, 683``%mm0',0, 684`%mm1',1, 685`%mm2',2, 686`%mm3',3, 687`%mm4',4, 688`%mm5',5, 689`%mm6',6, 690`%mm7',7') 691 692 693dnl Usage: psadbw(%srcreg,%dstreg) 694dnl 695dnl Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on 696dnl FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences. For 697dnl example, 698dnl 699dnl psadbw( %mm1, %mm2) 700dnl 701dnl Only register->register forms are supported here, which suffices for 702dnl the current code. 703 704define(psadbw, 705m4_instruction_wrapper() 706m4_assert_numargs(2) 707`.byte 0x0f,0xf6,dnl 708eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl 709 C `psadbw $1, $2'') 710 711 712dnl Usage: Zdisp(inst,op,op,op) 713dnl 714dnl Generate explicit .byte sequences if necessary to force a byte-sized 715dnl zero displacement on an instruction. For example, 716dnl 717dnl Zdisp( movl, 0,(%esi), %eax) 718dnl 719dnl expands to 720dnl 721dnl .byte 139,70,0 C movl 0(%esi), %eax 722dnl 723dnl If the displacement given isn't 0, then normal assembler code is 724dnl generated. For example, 725dnl 726dnl Zdisp( movl, 4,(%esi), %eax) 727dnl 728dnl expands to 729dnl 730dnl movl 4(%esi), %eax 731dnl 732dnl This means a single Zdisp() form can be used with an expression for the 733dnl displacement, and .byte will be used only if necessary. The 734dnl displacement argument is eval()ed. 735dnl 736dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is 737dnl implemented with a table of instructions and encodings. A new entry is 738dnl needed for any different operation or registers. The table is split 739dnl into separate macros to avoid overflowing BSD m4 macro expansion space. 740 741define(Zdisp, 742m4_assert_numargs(4) 743`define(`Zdisp_found',0)dnl 744Zdisp_1($@)dnl 745Zdisp_2($@)dnl 746Zdisp_3($@)dnl 747Zdisp_4($@)dnl 748ifelse(Zdisp_found,0, 749`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4 750')')') 751 752define(Zdisp_1,`dnl 753Zdisp_match( adcl, 0,(%edx), %eax, `0x13,0x42,0x00', $@)`'dnl 754Zdisp_match( adcl, 0,(%edx), %ebx, `0x13,0x5a,0x00', $@)`'dnl 755Zdisp_match( adcl, 0,(%edx), %esi, `0x13,0x72,0x00', $@)`'dnl 756Zdisp_match( addl, %ebx, 0,(%edi), `0x01,0x5f,0x00', $@)`'dnl 757Zdisp_match( addl, %ecx, 0,(%edi), `0x01,0x4f,0x00', $@)`'dnl 758Zdisp_match( addl, %esi, 0,(%edi), `0x01,0x77,0x00', $@)`'dnl 759Zdisp_match( sbbl, 0,(%edx), %eax, `0x1b,0x42,0x00', $@)`'dnl 760Zdisp_match( sbbl, 0,(%edx), %esi, `0x1b,0x72,0x00', $@)`'dnl 761Zdisp_match( subl, %ecx, 0,(%edi), `0x29,0x4f,0x00', $@)`'dnl 762Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl 763Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl 764Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax, `0x13,0x44,0x8b,0x00', $@)`'dnl 765Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax, `0x1b,0x44,0x8b,0x00', $@)`'dnl 766') 767define(Zdisp_2,`dnl 768Zdisp_match( movl, %eax, 0,(%edi), `0x89,0x47,0x00', $@)`'dnl 769Zdisp_match( movl, %ebx, 0,(%edi), `0x89,0x5f,0x00', $@)`'dnl 770Zdisp_match( movl, %esi, 0,(%edi), `0x89,0x77,0x00', $@)`'dnl 771Zdisp_match( movl, 0,(%ebx), %eax, `0x8b,0x43,0x00', $@)`'dnl 772Zdisp_match( movl, 0,(%ebx), %esi, `0x8b,0x73,0x00', $@)`'dnl 773Zdisp_match( movl, 0,(%edx), %eax, `0x8b,0x42,0x00', $@)`'dnl 774Zdisp_match( movl, 0,(%esi), %eax, `0x8b,0x46,0x00', $@)`'dnl 775Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl 776Zdisp_match( mov, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl 777Zdisp_match( mov, %eax, 0,(%edi,%ecx,4), `0x89,0x44,0x8f,0x00', $@)`'dnl 778') 779define(Zdisp_3,`dnl 780Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl 781Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl 782Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl 783Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl 784Zdisp_match( movq, 0,(%edx), %mm0, `0x0f,0x6f,0x42,0x00', $@)`'dnl 785Zdisp_match( movq, 0,(%esi), %mm0, `0x0f,0x6f,0x46,0x00', $@)`'dnl 786Zdisp_match( movq, %mm0, 0,(%edi), `0x0f,0x7f,0x47,0x00', $@)`'dnl 787Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl 788Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl 789Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl 790') 791define(Zdisp_4,`dnl 792Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl 793Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl 794Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl 795Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl 796Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl 797Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl 798Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl 799') 800 801define(Zdisp_match, 802m4_assert_numargs(9) 803`ifelse(eval(m4_stringequal_p(`$1',`$6') 804 && m4_stringequal_p(`$2',0) 805 && m4_stringequal_p(`$3',`$8') 806 && m4_stringequal_p(`$4',`$9')),1, 807`define(`Zdisp_found',1)dnl 808ifelse(eval(`$7'),0, 809` .byte $5 C `$1 0$3, $4'', 810` $6 $7$8, $9')', 811 812`ifelse(eval(m4_stringequal_p(`$1',`$6') 813 && m4_stringequal_p(`$2',`$7') 814 && m4_stringequal_p(`$3',0) 815 && m4_stringequal_p(`$4',`$9')),1, 816`define(`Zdisp_found',1)dnl 817ifelse(eval(`$8'),0, 818` .byte $5 C `$1 $2, 0$4'', 819` $6 $7, $8$9')')')') 820 821 822dnl Usage: shldl(count,src,dst) 823dnl shrdl(count,src,dst) 824dnl shldw(count,src,dst) 825dnl shrdw(count,src,dst) 826dnl 827dnl Generate a double-shift instruction, possibly omitting a %cl count 828dnl parameter if that's what the assembler requires, as indicated by 829dnl WANT_SHLDL_CL in config.m4. For example, 830dnl 831dnl shldl( %cl, %eax, %ebx) 832dnl 833dnl turns into either 834dnl 835dnl shldl %cl, %eax, %ebx 836dnl or 837dnl shldl %eax, %ebx 838dnl 839dnl Immediate counts are always passed through unchanged. For example, 840dnl 841dnl shrdl( $2, %esi, %edi) 842dnl becomes 843dnl shrdl $2, %esi, %edi 844dnl 845dnl 846dnl If you forget to use the macro form "shldl( ...)" and instead write 847dnl just a plain "shldl ...", an error results. This ensures the necessary 848dnl variant treatment of %cl isn't accidentally bypassed. 849 850define(define_shd_instruction, 851m4_assert_numargs(1) 852`define($1, 853m4_instruction_wrapper() 854m4_assert_numargs(3) 855`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl 856m4_doublequote($`'2),m4_doublequote($`'3)))') 857 858dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc 859define_shd_instruction(shldl) 860define_shd_instruction(shrdl) 861define_shd_instruction(shldw) 862define_shd_instruction(shrdw) 863 864dnl Called: shd_instruction(op,count,src,dst) 865define(shd_instruction, 866m4_assert_numargs(4) 867m4_assert_defined(`WANT_SHLDL_CL') 868`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1, 869``$1' `$3', `$4'', 870``$1' `$2', `$3', `$4'')') 871 872 873dnl Usage: ASSERT([cond][,instructions]) 874dnl 875dnl If WANT_ASSERT is 1, output the given instructions and expect the given 876dnl flags condition to then be satisfied. For example, 877dnl 878dnl ASSERT(ne, `cmpl %eax, %ebx') 879dnl 880dnl The instructions can be omitted to just assert a flags condition with 881dnl no extra calculation. For example, 882dnl 883dnl ASSERT(nc) 884dnl 885dnl When `instructions' is not empty, a pushf/popf is added to preserve the 886dnl flags, but the instructions themselves must preserve any registers that 887dnl matter. FRAME is adjusted for the push and pop, so the instructions 888dnl given can use defframe() stack variables. 889dnl 890dnl The condition can be omitted to just output the given instructions when 891dnl assertion checking is wanted. In this case the pushf/popf is omitted. 892dnl For example, 893dnl 894dnl ASSERT(, `movl %eax, VAR_KEEPVAL') 895 896define(ASSERT, 897m4_assert_numargs_range(1,2) 898m4_assert_defined(`WANT_ASSERT') 899`ifelse(WANT_ASSERT,1, 900`ifelse(`$1',, 901 `$2', 902 `C ASSERT 903ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')') 904 $2 905 j`$1' L(ASSERT_ok`'ASSERT_counter) 906 ud2 C assertion failed 907L(ASSERT_ok`'ASSERT_counter): 908ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')') 909define(`ASSERT_counter',incr(ASSERT_counter))')')') 910 911define(ASSERT_counter,1) 912 913 914dnl Usage: movl_text_address(label,register) 915dnl 916dnl Get the address of a text segment label, using either a plain movl or a 917dnl position-independent calculation, as necessary. For example, 918dnl 919dnl movl_code_address(L(foo),%eax) 920dnl 921dnl This macro is only meant for use in ASSERT()s or when testing, since 922dnl the PIC sequence it generates will want to be done with a ret balancing 923dnl the call on CPUs with return address branch prediction. 924dnl 925dnl The addl generated here has a backward reference to the label, and so 926dnl won't suffer from the two forwards references bug in old gas (described 927dnl in mpn/x86/README). 928 929define(movl_text_address, 930m4_assert_numargs(2) 931`ifdef(`PIC', 932 `call L(movl_text_address_`'movl_text_address_counter) 933L(movl_text_address_`'movl_text_address_counter): 934 popl $2 C %eip 935 addl `$'$1-L(movl_text_address_`'movl_text_address_counter), $2 936define(`movl_text_address_counter',incr(movl_text_address_counter))', 937 `movl `$'$1, $2')') 938 939define(movl_text_address_counter,1) 940 941 942dnl Usage: notl_or_xorl_GMP_NUMB_MASK(reg) 943dnl 944dnl Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as 945dnl appropriate for nails in use or not. 946 947define(notl_or_xorl_GMP_NUMB_MASK, 948m4_assert_numargs(1) 949`ifelse(GMP_NAIL_BITS,0, 950`notl `$1'', 951`xorl $GMP_NUMB_MASK, `$1'')') 952 953 954dnl Usage LEA(symbol,reg) 955dnl Usage LEAL(symbol_local_to_file,reg) 956 957define(`LEA', 958m4_assert_numargs(2) 959`ifdef(`PIC',`dnl 960ifelse(index(defn(`load_eip'), `$2'),-1, 961`m4append(`load_eip', 962` TEXT 963 ALIGN(16) 964L(movl_eip_`'substr($2,1)): 965 movl (%esp), $2 966 ret_internal 967')')dnl 968 call L(movl_eip_`'substr($2,1)) 969 addl $_GLOBAL_OFFSET_TABLE_, $2 970 movl $1@GOT($2), $2 971',` 972 movl `$'$1, $2 973')') 974 975define(`LEAL', 976m4_assert_numargs(2) 977`ifdef(`PIC',`dnl 978ifelse(index(defn(`load_eip'), `$2'),-1, 979`m4append(`load_eip', 980` TEXT 981 ALIGN(16) 982L(movl_eip_`'substr($2,1)): 983 movl (%esp), $2 984 ret_internal 985')')dnl 986 call L(movl_eip_`'substr($2,1)) 987 addl $_GLOBAL_OFFSET_TABLE_, $2 988 leal $1@GOTOFF($2), $2 989',` 990 movl `$'$1, $2 991')') 992 993dnl ASM_END 994 995define(`ASM_END',`load_eip') 996 997define(`load_eip', `') dnl updated in LEA/LEAL 998 999 1000define(`DEF_OBJECT', 1001m4_assert_numargs_range(1,2) 1002 `RODATA 1003 ALIGN(ifelse($#,1,2,$2)) 1004$1: 1005') 1006 1007define(`END_OBJECT', 1008m4_assert_numargs(1) 1009` SIZE(`$1',.-`$1')') 1010 1011dnl Usage: CALL(funcname) 1012dnl 1013 1014define(`CALL', 1015m4_assert_numargs(1) 1016`ifdef(`PIC', 1017 `call GSYM_PREFIX`'$1@PLT', 1018 `call GSYM_PREFIX`'$1')') 1019 1020ifdef(`PIC', 1021`define(`PIC_WITH_EBX')', 1022`undefine(`PIC_WITH_EBX')') 1023 1024divert`'dnl 1025