1divert(-1)
2
3dnl  m4 macros for x86 assembler.
4
5dnl  Copyright 1999-2003, 2007, 2010, 2012, 2014 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33
34dnl  Notes:
35dnl
36dnl  m4 isn't perfect for processing BSD style x86 assembler code, the main
37dnl  problems are,
38dnl
39dnl  1. Doing define(foo,123) and then using foo in an addressing mode like
40dnl     foo(%ebx) expands as a macro rather than a constant.  This is worked
41dnl     around by using deflit() from asm-defs.m4, instead of define().
42dnl
43dnl  2. Immediates in macro definitions need a space or `' to stop the $
44dnl     looking like a macro parameter.  For example,
45dnl
46dnl	        define(foo, `mov $ 123, %eax')
47dnl
48dnl     This is only a problem in macro definitions, not in ordinary text,
49dnl     and not in macro parameters like text passed to forloop() or ifdef().
50
51
52deflit(GMP_LIMB_BYTES, 4)
53
54
55dnl  Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL.  We
56dnl  undefine PIC since we don't need to be position independent in this
57dnl  case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
58
59ifdef(`DLL_EXPORT',`undefine(`PIC')')
60
61
62dnl  Usage: CPUVEC_FUNCS_LIST
63dnl
64dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
65dnl  order they appear in that structure.
66
67define(CPUVEC_FUNCS_LIST,
68``add_n',
69`addlsh1_n',
70`addlsh2_n',
71`addmul_1',
72`addmul_2',
73`bdiv_dbm1c',
74`cnd_add_n',
75`cnd_sub_n',
76`com',
77`copyd',
78`copyi',
79`divexact_1',
80`divrem_1',
81`gcd_11',
82`lshift',
83`lshiftc',
84`mod_1',
85`mod_1_1p',
86`mod_1_1p_cps',
87`mod_1s_2p',
88`mod_1s_2p_cps',
89`mod_1s_4p',
90`mod_1s_4p_cps',
91`mod_34lsub1',
92`modexact_1c_odd',
93`mul_1',
94`mul_basecase',
95`mullo_basecase',
96`preinv_divrem_1',
97`preinv_mod_1',
98`redc_1',
99`redc_2',
100`rshift',
101`sqr_basecase',
102`sub_n',
103`sublsh1_n',
104`submul_1'')
105
106
107dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
108dnl
109dnl  In the x86 code we use explicit TEXT and ALIGN() calls in the code,
110dnl  since different alignments are wanted in various circumstances.  So for
111dnl  instance,
112dnl
113dnl                  TEXT
114dnl                  ALIGN(16)
115dnl          PROLOGUE(mpn_add_n)
116dnl          ...
117dnl          EPILOGUE()
118
119define(`PROLOGUE_cpu',
120m4_assert_numargs(1)
121m4_assert_defined(`WANT_PROFILING')
122	`GLOBL	$1
123	TYPE($1,`function')
124	COFF_TYPE($1)
125$1:
126ifelse(WANT_PROFILING,`prof',      `	call_mcount')
127ifelse(WANT_PROFILING,`gprof',     `	call_mcount')
128ifelse(WANT_PROFILING,`instrument',`	call_instrument(enter)')
129')
130
131
132dnl  Usage: COFF_TYPE(GSYM_PREFIX`'foo)
133dnl
134dnl  Emit COFF style ".def ... .endef" type information for a function, when
135dnl  supported.  The argument should include any GSYM_PREFIX.
136dnl
137dnl  See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
138
139define(COFF_TYPE,
140m4_assert_numargs(1)
141m4_assert_defined(`HAVE_COFF_TYPE')
142`ifelse(HAVE_COFF_TYPE,yes,
143	`.def	$1
144	.scl	2
145	.type	32
146	.endef')')
147
148
149dnl  Usage: call_mcount
150dnl
151dnl  For `gprof' style profiling, %ebp is setup as a frame pointer.  None of
152dnl  the assembler routines use %ebp this way, so it's done only for the
153dnl  benefit of mcount.  glibc sysdeps/i386/i386-mcount.S shows how mcount
154dnl  gets the current function from (%esp) and the parent from 4(%ebp).
155dnl
156dnl  For `prof' style profiling gcc generates mcount calls without setting
157dnl  up %ebp, and the same is done here.
158
159define(`call_mcount',
160m4_assert_numargs(-1)
161m4_assert_defined(`WANT_PROFILING')
162m4_assert_defined(`MCOUNT_PIC_REG')
163m4_assert_defined(`MCOUNT_NONPIC_REG')
164m4_assert_defined(`MCOUNT_PIC_CALL')
165m4_assert_defined(`MCOUNT_NONPIC_CALL')
166`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
167`	DATA
168	ALIGN(4)
169L(mcount_data_`'mcount_counter):
170	W32	0
171	TEXT
172')dnl
173ifelse(WANT_PROFILING,`gprof',
174`	pushl	%ebp
175	movl	%esp, %ebp
176')dnl
177ifdef(`PIC',
178`	pushl	%ebx
179	call_movl_eip_to_ebx
180L(mcount_here_`'mcount_counter):
181	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
182ifelse(MCOUNT_PIC_REG,,,
183`	leal	L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
184MCOUNT_PIC_CALL
185	popl	%ebx
186',`dnl non-PIC
187ifelse(MCOUNT_NONPIC_REG,,,
188`	movl	`$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
189')dnl
190MCOUNT_NONPIC_CALL
191')dnl
192ifelse(WANT_PROFILING,`gprof',
193`	popl	%ebp
194')
195define(`mcount_counter',incr(mcount_counter))
196')
197
198define(mcount_counter,1)
199
200
201dnl  Usage: call_instrument(enter|exit)
202dnl
203dnl  Call __cyg_profile_func_enter or __cyg_profile_func_exit.
204dnl
205dnl  For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
206dnl  so %ebx is just setup for these calls.  It's a bit wasteful to repeat
207dnl  the setup for the exit call having done it earlier for the enter, but
208dnl  there's nowhere very convenient to hold %ebx through the length of a
209dnl  routine, in general.
210dnl
211dnl  For PIC, because instrument_current_function will be within the current
212dnl  object file we can get it just as an offset from %eip, there's no need
213dnl  to use the GOT.
214dnl
215dnl  No attempt is made to maintain the stack alignment gcc generates with
216dnl  -mpreferred-stack-boundary.  This wouldn't be hard, but it seems highly
217dnl  unlikely the instrumenting functions would be doing anything that'd
218dnl  benefit from alignment, in particular they're unlikely to be using
219dnl  doubles or long doubles on the stack.
220dnl
221dnl  The FRAME scheme is used to conveniently account for the register saves
222dnl  before accessing the return address.  Any previous value is saved and
223dnl  restored, since plenty of code keeps a value across a "ret" in the
224dnl  middle of a routine.
225
226define(call_instrument,
227m4_assert_numargs(1)
228`	pushdef(`FRAME',0)
229ifelse($1,exit,
230`	pushl	%eax	FRAME_pushl()	C return value
231')
232ifdef(`PIC',
233`	pushl	%ebx	FRAME_pushl()
234	call_movl_eip_to_ebx
235L(instrument_here_`'instrument_count):
236	movl	%ebx, %ecx
237	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
238	C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
239	addl	$instrument_current_function-L(instrument_here_`'instrument_count), %ecx
240	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
241	pushl	%ecx				FRAME_pushl()	C this function
242	call	GSYM_PREFIX`'__cyg_profile_func_$1@PLT
243	addl	$`'8, %esp
244	popl	%ebx
245',
246`	C non-PIC
247	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
248	pushl	$instrument_current_function	FRAME_pushl()	C this function
249	call	GSYM_PREFIX`'__cyg_profile_func_$1
250	addl	$`'8, %esp
251')
252ifelse($1,exit,
253`	popl	%eax			C return value
254')
255	popdef(`FRAME')
256define(`instrument_count',incr(instrument_count))
257')
258define(instrument_count,1)
259
260
261dnl  Usage: instrument_current_function
262dnl
263dnl  Return the current function name for instrumenting purposes.  This is
264dnl  PROLOGUE_current_function, but it sticks at the first such name seen.
265dnl
266dnl  Sticking to the first name seen ensures that multiple-entrypoint
267dnl  functions like mpn_add_nc and mpn_add_n will make enter and exit calls
268dnl  giving the same function address.
269
270define(instrument_current_function,
271m4_assert_numargs(-1)
272`ifdef(`instrument_current_function_seen',
273`instrument_current_function_seen',
274`define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
275PROLOGUE_current_function')')
276
277
278dnl  Usage: call_movl_eip_to_ebx
279dnl
280dnl  Generate a call to L(movl_eip_to_ebx), and record the need for that
281dnl  routine.
282
283define(call_movl_eip_to_ebx,
284m4_assert_numargs(-1)
285`call	L(movl_eip_to_ebx)
286define(`movl_eip_to_ebx_needed',1)')
287
288dnl  Usage: generate_movl_eip_to_ebx
289dnl
290dnl  Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
291
292define(generate_movl_eip_to_ebx,
293m4_assert_numargs(-1)
294`ifelse(movl_eip_to_ebx_needed,1,
295`ifelse(movl_eip_to_ebx_done,1,,
296`L(movl_eip_to_ebx):
297	movl	(%esp), %ebx
298	ret_internal
299define(`movl_eip_to_ebx_done',1)
300')')')
301
302
303dnl  Usage: ret
304dnl
305dnl  Generate a "ret", but if doing instrumented profiling then call
306dnl  __cyg_profile_func_exit first.
307
308define(ret,
309m4_assert_numargs(-1)
310m4_assert_defined(`WANT_PROFILING')
311`ifelse(WANT_PROFILING,instrument,
312`ret_instrument',
313`ret_internal')
314generate_movl_eip_to_ebx
315')
316
317
318dnl  Usage: ret_internal
319dnl
320dnl  A plain "ret", without any __cyg_profile_func_exit call.  This can be
321dnl  used for a return which is internal to some function, such as when
322dnl  getting %eip for PIC.
323
324define(ret_internal,
325m4_assert_numargs(-1)
326``ret'')
327
328
329dnl  Usage: ret_instrument
330dnl
331dnl  Generate call to __cyg_profile_func_exit and then a ret.  If a ret has
332dnl  already been seen from this function then jump to that chunk of code,
333dnl  rather than emitting it again.
334
335define(ret_instrument,
336m4_assert_numargs(-1)
337`ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
338`jmp	L(instrument_exit_`'instrument_current_function)',
339`define(ret_instrument_seen_`'instrument_current_function,1)
340L(instrument_exit_`'instrument_current_function):
341call_instrument(exit)
342	ret_internal')')
343
344
345dnl  Usage: _GLOBAL_OFFSET_TABLE_
346dnl
347dnl  Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
348dnl  This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
349dnl  work with systems requiring an extra underscore such as OpenBSD.
350dnl
351dnl  deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
352dnl  out right, though that form doesn't work properly in gas (see
353dnl  mpn/x86/README).
354
355deflit(_GLOBAL_OFFSET_TABLE_,
356m4_assert_defined(`GOT_GSYM_PREFIX')
357`GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
358
359
360dnl  --------------------------------------------------------------------------
361dnl  Various x86 macros.
362dnl
363
364
365dnl  Usage: ALIGN_OFFSET(bytes,offset)
366dnl
367dnl  Align to `offset' away from a multiple of `bytes'.
368dnl
369dnl  This is useful for testing, for example align to something very strict
370dnl  and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
371dnl
372dnl  Generally you wouldn't execute across the padding, but it's done with
373dnl  nop's so it'll work.
374
375define(ALIGN_OFFSET,
376m4_assert_numargs(2)
377`ALIGN($1)
378forloop(`i',1,$2,`	nop
379')')
380
381
382dnl  Usage: defframe(name,offset)
383dnl
384dnl  Make a definition like the following with which to access a parameter
385dnl  or variable on the stack.
386dnl
387dnl         define(name,`FRAME+offset(%esp)')
388dnl
389dnl  Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
390dnl  byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
391dnl  Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
392dnl  zero offset is wanted.
393dnl
394dnl  The new macro also gets a check that when it's used FRAME is actually
395dnl  defined, and that the final %esp offset isn't negative, which would
396dnl  mean an attempt to access something below the current %esp.
397dnl
398dnl  deflit() is used rather than a plain define(), so the new macro won't
399dnl  delete any following parenthesized expression.  name(%edi) will come
400dnl  out say as 16(%esp)(%edi).  This isn't valid assembler and should
401dnl  provoke an error, which is better than silently giving just 16(%esp).
402dnl
403dnl  See README for more on the suggested way to access the stack frame.
404
405define(defframe,
406m4_assert_numargs(2)
407`deflit(`$1',
408m4_assert_defined(`FRAME')
409`defframe_check_notbelow(`$1',$2,FRAME)dnl
410defframe_empty_if_zero(FRAME+($2))(%esp)')')
411
412dnl  Called: defframe_empty_if_zero(expression)
413define(defframe_empty_if_zero,
414m4_assert_numargs(1)
415`ifelse(defframe_empty_if_zero_disabled,1,
416`eval($1)',
417`m4_empty_if_zero($1)')')
418
419dnl  Called: defframe_check_notbelow(`name',offset,FRAME)
420define(defframe_check_notbelow,
421m4_assert_numargs(3)
422`ifelse(eval(($3)+($2)<0),1,
423`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
424')')')
425
426
427dnl  Usage: FRAME_pushl()
428dnl         FRAME_popl()
429dnl         FRAME_addl_esp(n)
430dnl         FRAME_subl_esp(n)
431dnl
432dnl  Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
433dnl  %esp of n bytes.
434dnl
435dnl  Using these macros is completely optional.  Sometimes it makes more
436dnl  sense to put explicit deflit(`FRAME',N) forms, especially when there's
437dnl  jumps and different sequences of FRAME values need to be used in
438dnl  different places.
439
440define(FRAME_pushl,
441m4_assert_numargs(0)
442m4_assert_defined(`FRAME')
443`deflit(`FRAME',eval(FRAME+4))')
444
445define(FRAME_popl,
446m4_assert_numargs(0)
447m4_assert_defined(`FRAME')
448`deflit(`FRAME',eval(FRAME-4))')
449
450define(FRAME_addl_esp,
451m4_assert_numargs(1)
452m4_assert_defined(`FRAME')
453`deflit(`FRAME',eval(FRAME-($1)))')
454
455define(FRAME_subl_esp,
456m4_assert_numargs(1)
457m4_assert_defined(`FRAME')
458`deflit(`FRAME',eval(FRAME+($1)))')
459
460
461dnl  Usage: defframe_pushl(name)
462dnl
463dnl  Do a combination FRAME_pushl() and a defframe() to name the stack
464dnl  location just pushed.  This should come after a pushl instruction.
465dnl  Putting it on the same line works and avoids lengthening the code.  For
466dnl  example,
467dnl
468dnl         pushl   %eax     defframe_pushl(VAR_COUNTER)
469dnl
470dnl  Notice the defframe() is done with an unquoted -FRAME thus giving its
471dnl  current value without tracking future changes.
472
473define(defframe_pushl,
474m4_assert_numargs(1)
475`FRAME_pushl()defframe(`$1',-FRAME)')
476
477
478dnl  --------------------------------------------------------------------------
479dnl  Assembler instruction macros.
480dnl
481
482
483dnl  Usage: emms_or_femms
484dnl         femms_available_p
485dnl
486dnl  femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
487dnl  femms instruction is available.  emms_or_femms expands to femms if
488dnl  available, or emms if not.
489dnl
490dnl  emms_or_femms is meant for use in the K6 directory where plain K6
491dnl  (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
492dnl  supported together.
493dnl
494dnl  On K7 femms is no longer faster and is just an alias for emms, so plain
495dnl  emms may as well be used.
496
497define(femms_available_p,
498m4_assert_numargs(-1)
499`m4_ifdef_anyof_p(
500	`HAVE_HOST_CPU_k62',
501	`HAVE_HOST_CPU_k63',
502	`HAVE_HOST_CPU_athlon')')
503
504define(emms_or_femms,
505m4_assert_numargs(-1)
506`ifelse(femms_available_p,1,`femms',`emms')')
507
508
509dnl  Usage: femms
510dnl
511dnl  Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
512dnl  following is a replacement using .byte.
513
514define(femms,
515m4_assert_numargs(-1)
516`.byte	15,14	C AMD 3DNow femms')
517
518
519dnl  Usage: jadcl0(op)
520dnl
521dnl  Generate a jnc/incl as a substitute for adcl $0,op.  Note this isn't an
522dnl  exact replacement, since it doesn't set the flags like adcl does.
523dnl
524dnl  This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
525dnl  mpn_sqr_basecase because on K6 an adcl is slow, the branch
526dnl  misprediction penalty is small, and the multiply algorithm used leads
527dnl  to a carry bit on average only 1/4 of the time.
528dnl
529dnl  jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
530dnl  for comparison.  For example,
531dnl
532dnl		define(`jadcl0_disabled',1)
533dnl
534dnl  When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
535dnl  the same size as an adcl.  This makes it possible to use the exact same
536dnl  computed jump code when testing the relative speed of the two.
537
538define(jadcl0,
539m4_assert_numargs(1)
540`ifelse(jadcl0_disabled,1,
541	`adcl	$`'0, $1',
542	`jnc	L(jadcl0_`'jadcl0_counter)
543	incl	$1
544L(jadcl0_`'jadcl0_counter):
545define(`jadcl0_counter',incr(jadcl0_counter))')')
546
547define(jadcl0_counter,1)
548
549
550dnl  Usage: x86_lookup(target, key,value, key,value, ...)
551dnl         x86_lookup_p(target, key,value, key,value, ...)
552dnl
553dnl  Look for `target' among the `key' parameters.
554dnl
555dnl  x86_lookup expands to the corresponding `value', or generates an error
556dnl  if `target' isn't found.
557dnl
558dnl  x86_lookup_p expands to 1 if `target' is found, or 0 if not.
559
560define(x86_lookup,
561m4_assert_numargs_range(1,999)
562`ifelse(eval($#<3),1,
563`m4_error(`unrecognised part of x86 instruction: $1
564')',
565`ifelse(`$1',`$2', `$3',
566`x86_lookup(`$1',shift(shift(shift($@))))')')')
567
568define(x86_lookup_p,
569m4_assert_numargs_range(1,999)
570`ifelse(eval($#<3),1, `0',
571`ifelse(`$1',`$2',    `1',
572`x86_lookup_p(`$1',shift(shift(shift($@))))')')')
573
574
575dnl  Usage: x86_opcode_reg32(reg)
576dnl         x86_opcode_reg32_p(reg)
577dnl
578dnl  x86_opcode_reg32 expands to the standard 3 bit encoding for the given
579dnl  32-bit register, eg. `%ebp' turns into 5.
580dnl
581dnl  x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
582dnl  if not.
583
584define(x86_opcode_reg32,
585m4_assert_numargs(1)
586`x86_lookup(`$1',x86_opcode_reg32_list)')
587
588define(x86_opcode_reg32_p,
589m4_assert_onearg()
590`x86_lookup_p(`$1',x86_opcode_reg32_list)')
591
592define(x86_opcode_reg32_list,
593``%eax',0,
594`%ecx',1,
595`%edx',2,
596`%ebx',3,
597`%esp',4,
598`%ebp',5,
599`%esi',6,
600`%edi',7')
601
602
603dnl  Usage: x86_opcode_tttn(cond)
604dnl
605dnl  Expand to the 4-bit "tttn" field value for the given x86 branch
606dnl  condition (like `c', `ae', etc).
607
608define(x86_opcode_tttn,
609m4_assert_numargs(1)
610`x86_lookup(`$1',x86_opcode_ttn_list)')
611
612define(x86_opcode_tttn_list,
613``o',  0,
614`no',  1,
615`b',   2, `c',  2, `nae',2,
616`nb',  3, `nc', 3, `ae', 3,
617`e',   4, `z',  4,
618`ne',  5, `nz', 5,
619`be',  6, `na', 6,
620`nbe', 7, `a',  7,
621`s',   8,
622`ns',  9,
623`p',  10, `pe', 10, `npo',10,
624`np', 11, `npe',11, `po', 11,
625`l',  12, `nge',12,
626`nl', 13, `ge', 13,
627`le', 14, `ng', 14,
628`nle',15, `g',  15')
629
630
631dnl  Usage: cmovCC(%srcreg,%dstreg)
632dnl
633dnl  Emit a cmov instruction, using a .byte sequence, since various past
634dnl  versions of gas don't know cmov.  For example,
635dnl
636dnl         cmovz(  %eax, %ebx)
637dnl
638dnl  The source operand can only be a plain register.  (m4 code implementing
639dnl  full memory addressing modes exists, believe it or not, but isn't
640dnl  currently needed and isn't included.)
641dnl
642dnl  All the standard conditions are defined.  Attempting to use one without
643dnl  the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
644dnl  an error.  This protects against writing something old gas wouldn't
645dnl  understand.
646
647dnl  Called: define_cmov_many(cond,tttn,cond,tttn,...)
648define(define_cmov_many,
649`ifelse(m4_length(`$1'),0,,
650`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
651
652dnl  Called: define_cmov(cond,tttn)
653dnl  Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
654define(define_cmov,
655m4_assert_numargs(2)
656`define(`cmov$1',
657m4_instruction_wrapper()
658m4_assert_numargs(2)
659`cmov_internal'(m4_doublequote($`'0),``$2'',dnl
660m4_doublequote($`'1),m4_doublequote($`'2)))')
661
662define_cmov_many(x86_opcode_tttn_list)
663
664dnl  Called: cmov_internal(name,tttn,src,dst)
665define(cmov_internal,
666m4_assert_numargs(4)
667`.byte	dnl
66815, dnl
669eval(64+$2), dnl
670eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
671	C `$1 $3, $4'')
672
673
674dnl  Usage: x86_opcode_regmmx(reg)
675dnl
676dnl  Validate the given mmx register, and return its number, 0 to 7.
677
678define(x86_opcode_regmmx,
679m4_assert_numargs(1)
680`x86_lookup(`$1',x86_opcode_regmmx_list)')
681
682define(x86_opcode_regmmx_list,
683``%mm0',0,
684`%mm1',1,
685`%mm2',2,
686`%mm3',3,
687`%mm4',4,
688`%mm5',5,
689`%mm6',6,
690`%mm7',7')
691
692
693dnl  Usage: psadbw(%srcreg,%dstreg)
694dnl
695dnl  Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
696dnl  FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences.  For
697dnl  example,
698dnl
699dnl         psadbw( %mm1, %mm2)
700dnl
701dnl  Only register->register forms are supported here, which suffices for
702dnl  the current code.
703
704define(psadbw,
705m4_instruction_wrapper()
706m4_assert_numargs(2)
707`.byte 0x0f,0xf6,dnl
708eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
709	C `psadbw $1, $2'')
710
711
712dnl  Usage: Zdisp(inst,op,op,op)
713dnl
714dnl  Generate explicit .byte sequences if necessary to force a byte-sized
715dnl  zero displacement on an instruction.  For example,
716dnl
717dnl         Zdisp(  movl,   0,(%esi), %eax)
718dnl
719dnl  expands to
720dnl
721dnl                 .byte   139,70,0  C movl 0(%esi), %eax
722dnl
723dnl  If the displacement given isn't 0, then normal assembler code is
724dnl  generated.  For example,
725dnl
726dnl         Zdisp(  movl,   4,(%esi), %eax)
727dnl
728dnl  expands to
729dnl
730dnl                 movl    4(%esi), %eax
731dnl
732dnl  This means a single Zdisp() form can be used with an expression for the
733dnl  displacement, and .byte will be used only if necessary.  The
734dnl  displacement argument is eval()ed.
735dnl
736dnl  Because there aren't many places a 0(reg) form is wanted, Zdisp is
737dnl  implemented with a table of instructions and encodings.  A new entry is
738dnl  needed for any different operation or registers.  The table is split
739dnl  into separate macros to avoid overflowing BSD m4 macro expansion space.
740
741define(Zdisp,
742m4_assert_numargs(4)
743`define(`Zdisp_found',0)dnl
744Zdisp_1($@)dnl
745Zdisp_2($@)dnl
746Zdisp_3($@)dnl
747Zdisp_4($@)dnl
748ifelse(Zdisp_found,0,
749`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
750')')')
751
752define(Zdisp_1,`dnl
753Zdisp_match( adcl, 0,(%edx), %eax,        `0x13,0x42,0x00',           $@)`'dnl
754Zdisp_match( adcl, 0,(%edx), %ebx,        `0x13,0x5a,0x00',           $@)`'dnl
755Zdisp_match( adcl, 0,(%edx), %esi,        `0x13,0x72,0x00',           $@)`'dnl
756Zdisp_match( addl, %ebx, 0,(%edi),        `0x01,0x5f,0x00',           $@)`'dnl
757Zdisp_match( addl, %ecx, 0,(%edi),        `0x01,0x4f,0x00',           $@)`'dnl
758Zdisp_match( addl, %esi, 0,(%edi),        `0x01,0x77,0x00',           $@)`'dnl
759Zdisp_match( sbbl, 0,(%edx), %eax,        `0x1b,0x42,0x00',           $@)`'dnl
760Zdisp_match( sbbl, 0,(%edx), %esi,        `0x1b,0x72,0x00',           $@)`'dnl
761Zdisp_match( subl, %ecx, 0,(%edi),        `0x29,0x4f,0x00',           $@)`'dnl
762Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
763Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
764Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax,  `0x13,0x44,0x8b,0x00',      $@)`'dnl
765Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax,  `0x1b,0x44,0x8b,0x00',      $@)`'dnl
766')
767define(Zdisp_2,`dnl
768Zdisp_match( movl, %eax, 0,(%edi),        `0x89,0x47,0x00',           $@)`'dnl
769Zdisp_match( movl, %ebx, 0,(%edi),        `0x89,0x5f,0x00',           $@)`'dnl
770Zdisp_match( movl, %esi, 0,(%edi),        `0x89,0x77,0x00',           $@)`'dnl
771Zdisp_match( movl, 0,(%ebx), %eax,        `0x8b,0x43,0x00',           $@)`'dnl
772Zdisp_match( movl, 0,(%ebx), %esi,        `0x8b,0x73,0x00',           $@)`'dnl
773Zdisp_match( movl, 0,(%edx), %eax,        `0x8b,0x42,0x00',           $@)`'dnl
774Zdisp_match( movl, 0,(%esi), %eax,        `0x8b,0x46,0x00',           $@)`'dnl
775Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00',      $@)`'dnl
776Zdisp_match( mov, 0,(%esi,%ecx,4), %eax,  `0x8b,0x44,0x8e,0x00',      $@)`'dnl
777Zdisp_match( mov, %eax, 0,(%edi,%ecx,4),  `0x89,0x44,0x8f,0x00',      $@)`'dnl
778')
779define(Zdisp_3,`dnl
780Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
781Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
782Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
783Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
784Zdisp_match( movq, 0,(%edx), %mm0,        `0x0f,0x6f,0x42,0x00',      $@)`'dnl
785Zdisp_match( movq, 0,(%esi), %mm0,        `0x0f,0x6f,0x46,0x00',      $@)`'dnl
786Zdisp_match( movq, %mm0, 0,(%edi),        `0x0f,0x7f,0x47,0x00',      $@)`'dnl
787Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
788Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
789Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
790')
791define(Zdisp_4,`dnl
792Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
793Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
794Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
795Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
796Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
797Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
798Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
799')
800
801define(Zdisp_match,
802m4_assert_numargs(9)
803`ifelse(eval(m4_stringequal_p(`$1',`$6')
804	&& m4_stringequal_p(`$2',0)
805	&& m4_stringequal_p(`$3',`$8')
806	&& m4_stringequal_p(`$4',`$9')),1,
807`define(`Zdisp_found',1)dnl
808ifelse(eval(`$7'),0,
809`	.byte	$5  C `$1 0$3, $4'',
810`	$6	$7$8, $9')',
811
812`ifelse(eval(m4_stringequal_p(`$1',`$6')
813	&& m4_stringequal_p(`$2',`$7')
814	&& m4_stringequal_p(`$3',0)
815	&& m4_stringequal_p(`$4',`$9')),1,
816`define(`Zdisp_found',1)dnl
817ifelse(eval(`$8'),0,
818`	.byte	$5  C `$1 $2, 0$4'',
819`	$6	$7, $8$9')')')')
820
821
822dnl  Usage: shldl(count,src,dst)
823dnl         shrdl(count,src,dst)
824dnl         shldw(count,src,dst)
825dnl         shrdw(count,src,dst)
826dnl
827dnl  Generate a double-shift instruction, possibly omitting a %cl count
828dnl  parameter if that's what the assembler requires, as indicated by
829dnl  WANT_SHLDL_CL in config.m4.  For example,
830dnl
831dnl         shldl(  %cl, %eax, %ebx)
832dnl
833dnl  turns into either
834dnl
835dnl         shldl   %cl, %eax, %ebx
836dnl  or
837dnl         shldl   %eax, %ebx
838dnl
839dnl  Immediate counts are always passed through unchanged.  For example,
840dnl
841dnl         shrdl(  $2, %esi, %edi)
842dnl  becomes
843dnl         shrdl   $2, %esi, %edi
844dnl
845dnl
846dnl  If you forget to use the macro form "shldl( ...)" and instead write
847dnl  just a plain "shldl ...", an error results.  This ensures the necessary
848dnl  variant treatment of %cl isn't accidentally bypassed.
849
850define(define_shd_instruction,
851m4_assert_numargs(1)
852`define($1,
853m4_instruction_wrapper()
854m4_assert_numargs(3)
855`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
856m4_doublequote($`'2),m4_doublequote($`'3)))')
857
858dnl  Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
859define_shd_instruction(shldl)
860define_shd_instruction(shrdl)
861define_shd_instruction(shldw)
862define_shd_instruction(shrdw)
863
864dnl  Called: shd_instruction(op,count,src,dst)
865define(shd_instruction,
866m4_assert_numargs(4)
867m4_assert_defined(`WANT_SHLDL_CL')
868`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
869``$1'	`$3', `$4'',
870``$1'	`$2', `$3', `$4'')')
871
872
873dnl  Usage: ASSERT([cond][,instructions])
874dnl
875dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
876dnl  flags condition to then be satisfied.  For example,
877dnl
878dnl         ASSERT(ne, `cmpl %eax, %ebx')
879dnl
880dnl  The instructions can be omitted to just assert a flags condition with
881dnl  no extra calculation.  For example,
882dnl
883dnl         ASSERT(nc)
884dnl
885dnl  When `instructions' is not empty, a pushf/popf is added to preserve the
886dnl  flags, but the instructions themselves must preserve any registers that
887dnl  matter.  FRAME is adjusted for the push and pop, so the instructions
888dnl  given can use defframe() stack variables.
889dnl
890dnl  The condition can be omitted to just output the given instructions when
891dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
892dnl  For example,
893dnl
894dnl         ASSERT(, `movl %eax, VAR_KEEPVAL')
895
896define(ASSERT,
897m4_assert_numargs_range(1,2)
898m4_assert_defined(`WANT_ASSERT')
899`ifelse(WANT_ASSERT,1,
900`ifelse(`$1',,
901	`$2',
902	`C ASSERT
903ifelse(`$2',,,`	pushf	ifdef(`FRAME',`FRAME_pushl()')')
904	$2
905	j`$1'	L(ASSERT_ok`'ASSERT_counter)
906	ud2	C assertion failed
907L(ASSERT_ok`'ASSERT_counter):
908ifelse(`$2',,,`	popf	ifdef(`FRAME',`FRAME_popl()')')
909define(`ASSERT_counter',incr(ASSERT_counter))')')')
910
911define(ASSERT_counter,1)
912
913
914dnl  Usage: movl_text_address(label,register)
915dnl
916dnl  Get the address of a text segment label, using either a plain movl or a
917dnl  position-independent calculation, as necessary.  For example,
918dnl
919dnl         movl_code_address(L(foo),%eax)
920dnl
921dnl  This macro is only meant for use in ASSERT()s or when testing, since
922dnl  the PIC sequence it generates will want to be done with a ret balancing
923dnl  the call on CPUs with return address branch prediction.
924dnl
925dnl  The addl generated here has a backward reference to the label, and so
926dnl  won't suffer from the two forwards references bug in old gas (described
927dnl  in mpn/x86/README).
928
929define(movl_text_address,
930m4_assert_numargs(2)
931`ifdef(`PIC',
932	`call	L(movl_text_address_`'movl_text_address_counter)
933L(movl_text_address_`'movl_text_address_counter):
934	popl	$2	C %eip
935	addl	`$'$1-L(movl_text_address_`'movl_text_address_counter), $2
936define(`movl_text_address_counter',incr(movl_text_address_counter))',
937	`movl	`$'$1, $2')')
938
939define(movl_text_address_counter,1)
940
941
942dnl  Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
943dnl
944dnl  Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
945dnl  appropriate for nails in use or not.
946
947define(notl_or_xorl_GMP_NUMB_MASK,
948m4_assert_numargs(1)
949`ifelse(GMP_NAIL_BITS,0,
950`notl	`$1'',
951`xorl	$GMP_NUMB_MASK, `$1'')')
952
953
954dnl  Usage LEA(symbol,reg)
955dnl  Usage LEAL(symbol_local_to_file,reg)
956
957define(`LEA',
958m4_assert_numargs(2)
959`ifdef(`PIC',`dnl
960ifelse(index(defn(`load_eip'), `$2'),-1,
961`m4append(`load_eip',
962`	TEXT
963	ALIGN(16)
964L(movl_eip_`'substr($2,1)):
965	movl	(%esp), $2
966	ret_internal
967')')dnl
968	call	L(movl_eip_`'substr($2,1))
969	addl	$_GLOBAL_OFFSET_TABLE_, $2
970	movl	$1@GOT($2), $2
971',`
972	movl	`$'$1, $2
973')')
974
975define(`LEAL',
976m4_assert_numargs(2)
977`ifdef(`PIC',`dnl
978ifelse(index(defn(`load_eip'), `$2'),-1,
979`m4append(`load_eip',
980`	TEXT
981	ALIGN(16)
982L(movl_eip_`'substr($2,1)):
983	movl	(%esp), $2
984	ret_internal
985')')dnl
986	call	L(movl_eip_`'substr($2,1))
987	addl	$_GLOBAL_OFFSET_TABLE_, $2
988	leal	$1@GOTOFF($2), $2
989',`
990	movl	`$'$1, $2
991')')
992
993dnl ASM_END
994
995define(`ASM_END',`load_eip')
996
997define(`load_eip', `')		dnl updated in LEA/LEAL
998
999
1000define(`DEF_OBJECT',
1001m4_assert_numargs_range(1,2)
1002	`RODATA
1003	ALIGN(ifelse($#,1,2,$2))
1004$1:
1005')
1006
1007define(`END_OBJECT',
1008m4_assert_numargs(1)
1009`	SIZE(`$1',.-`$1')')
1010
1011dnl  Usage: CALL(funcname)
1012dnl
1013
1014define(`CALL',
1015m4_assert_numargs(1)
1016`ifdef(`PIC',
1017  `call	GSYM_PREFIX`'$1@PLT',
1018  `call	GSYM_PREFIX`'$1')')
1019
1020ifdef(`PIC',
1021`define(`PIC_WITH_EBX')',
1022`undefine(`PIC_WITH_EBX')')
1023
1024divert`'dnl
1025