1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2014
4  *
5  * Registers used in STG code.  Might or might not correspond to
6  * actual machine registers.
7  *
8  * Do not #include this file directly: #include "Rts.h" instead.
9  *
10  * To understand the structure of the RTS headers, see the wiki:
11  *   http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
12  *
13  * ---------------------------------------------------------------------------*/
14 
15 #pragma once
16 
17 /* This file is #included into Haskell code in the compiler: #defines
18  * only in here please.
19  */
20 
21 /*
22  * Undefine these as a precaution: some of them were found to be
23  * defined by system headers on ARM/Linux.
24  */
25 #undef REG_R1
26 #undef REG_R2
27 #undef REG_R3
28 #undef REG_R4
29 #undef REG_R5
30 #undef REG_R6
31 #undef REG_R7
32 #undef REG_R8
33 #undef REG_R9
34 #undef REG_R10
35 
36 /*
37  * Defining MACHREGS_NO_REGS to 1 causes no global registers to be used.
38  * MACHREGS_NO_REGS is typically controlled by NO_REGS, which is
39  * typically defined by GHC, via a command-line option passed to gcc,
40  * when the -funregisterised flag is given.
41  *
42  * NB. When MACHREGS_NO_REGS to 1, calling & return conventions may be
43  * different.  For example, all function arguments will be passed on
44  * the stack, and components of an unboxed tuple will be returned on
45  * the stack rather than in registers.
46  */
47 #if MACHREGS_NO_REGS == 1
48 
49 /* Nothing */
50 
51 #elif MACHREGS_NO_REGS == 0
52 
53 /* ----------------------------------------------------------------------------
54    Caller saves and callee-saves regs.
55 
56    Caller-saves regs have to be saved around C-calls made from STG
57    land, so this file defines CALLER_SAVES_<reg> for each <reg> that
58    is designated caller-saves in that machine's C calling convention.
59 
60    As it stands, the only registers that are ever marked caller saves
61    are the RX, FX, DX and USER registers; as a result, if you
62    decide to caller save a system register (e.g. SP, HP, etc), note that
63    this code path is completely untested! -- EZY
64    -------------------------------------------------------------------------- */
65 
66 /* -----------------------------------------------------------------------------
67    The x86 register mapping
68 
69    Ok, we've only got 6 general purpose registers, a frame pointer and a
70    stack pointer.  \tr{%eax} and \tr{%edx} are return values from C functions,
71    hence they get trashed across ccalls and are caller saves. \tr{%ebx},
72    \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves.
73 
74    Reg     STG-Reg
75    ---------------
76    ebx     Base
77    ebp     Sp
78    esi     R1
79    edi     Hp
80 
81    Leaving SpLim out of the picture.
82    -------------------------------------------------------------------------- */
83 
84 #if defined(MACHREGS_i386)
85 
86 #define REG(x) __asm__("%" #x)
87 
88 #if !defined(not_doing_dynamic_linking)
89 #define REG_Base    ebx
90 #endif
91 #define REG_Sp      ebp
92 
93 #if !defined(STOLEN_X86_REGS)
94 #define STOLEN_X86_REGS 4
95 #endif
96 
97 #if STOLEN_X86_REGS >= 3
98 # define REG_R1     esi
99 #endif
100 
101 #if STOLEN_X86_REGS >= 4
102 # define REG_Hp     edi
103 #endif
104 #define REG_MachSp  esp
105 
106 #define REG_XMM1    xmm0
107 #define REG_XMM2    xmm1
108 #define REG_XMM3    xmm2
109 #define REG_XMM4    xmm3
110 
111 #define REG_YMM1    ymm0
112 #define REG_YMM2    ymm1
113 #define REG_YMM3    ymm2
114 #define REG_YMM4    ymm3
115 
116 #define REG_ZMM1    zmm0
117 #define REG_ZMM2    zmm1
118 #define REG_ZMM3    zmm2
119 #define REG_ZMM4    zmm3
120 
121 #define MAX_REAL_VANILLA_REG 1  /* always, since it defines the entry conv */
122 #define MAX_REAL_FLOAT_REG   0
123 #define MAX_REAL_DOUBLE_REG  0
124 #define MAX_REAL_LONG_REG    0
125 #define MAX_REAL_XMM_REG     4
126 #define MAX_REAL_YMM_REG     4
127 #define MAX_REAL_ZMM_REG     4
128 
129 /* -----------------------------------------------------------------------------
130   The x86-64 register mapping
131 
132   %rax          caller-saves, don't steal this one
133   %rbx          YES
134   %rcx          arg reg, caller-saves
135   %rdx          arg reg, caller-saves
136   %rsi          arg reg, caller-saves
137   %rdi          arg reg, caller-saves
138   %rbp          YES (our *prime* register)
139   %rsp          (unavailable - stack pointer)
140   %r8           arg reg, caller-saves
141   %r9           arg reg, caller-saves
142   %r10          caller-saves
143   %r11          caller-saves
144   %r12          YES
145   %r13          YES
146   %r14          YES
147   %r15          YES
148 
149   %xmm0-7       arg regs, caller-saves
150   %xmm8-15      caller-saves
151 
152   Use the caller-saves regs for Rn, because we don't always have to
153   save those (as opposed to Sp/Hp/SpLim etc. which always have to be
154   saved).
155 
156   --------------------------------------------------------------------------- */
157 
158 #elif defined(MACHREGS_x86_64)
159 
160 #define REG(x) __asm__("%" #x)
161 
162 #define REG_Base  r13
163 #define REG_Sp    rbp
164 #define REG_Hp    r12
165 #define REG_R1    rbx
166 #define REG_R2    r14
167 #define REG_R3    rsi
168 #define REG_R4    rdi
169 #define REG_R5    r8
170 #define REG_R6    r9
171 #define REG_SpLim r15
172 #define REG_MachSp  rsp
173 
174 /*
175 Map both Fn and Dn to register xmmn so that we can pass a function any
176 combination of up to six Float# or Double# arguments without touching
177 the stack. See Note [Overlapping global registers] for implications.
178 */
179 
180 #define REG_F1    xmm1
181 #define REG_F2    xmm2
182 #define REG_F3    xmm3
183 #define REG_F4    xmm4
184 #define REG_F5    xmm5
185 #define REG_F6    xmm6
186 
187 #define REG_D1    xmm1
188 #define REG_D2    xmm2
189 #define REG_D3    xmm3
190 #define REG_D4    xmm4
191 #define REG_D5    xmm5
192 #define REG_D6    xmm6
193 
194 #define REG_XMM1    xmm1
195 #define REG_XMM2    xmm2
196 #define REG_XMM3    xmm3
197 #define REG_XMM4    xmm4
198 #define REG_XMM5    xmm5
199 #define REG_XMM6    xmm6
200 
201 #define REG_YMM1    ymm1
202 #define REG_YMM2    ymm2
203 #define REG_YMM3    ymm3
204 #define REG_YMM4    ymm4
205 #define REG_YMM5    ymm5
206 #define REG_YMM6    ymm6
207 
208 #define REG_ZMM1    zmm1
209 #define REG_ZMM2    zmm2
210 #define REG_ZMM3    zmm3
211 #define REG_ZMM4    zmm4
212 #define REG_ZMM5    zmm5
213 #define REG_ZMM6    zmm6
214 
215 #if !defined(mingw32_HOST_OS)
216 #define CALLER_SAVES_R3
217 #define CALLER_SAVES_R4
218 #endif
219 #define CALLER_SAVES_R5
220 #define CALLER_SAVES_R6
221 
222 #define CALLER_SAVES_F1
223 #define CALLER_SAVES_F2
224 #define CALLER_SAVES_F3
225 #define CALLER_SAVES_F4
226 #define CALLER_SAVES_F5
227 #if !defined(mingw32_HOST_OS)
228 #define CALLER_SAVES_F6
229 #endif
230 
231 #define CALLER_SAVES_D1
232 #define CALLER_SAVES_D2
233 #define CALLER_SAVES_D3
234 #define CALLER_SAVES_D4
235 #define CALLER_SAVES_D5
236 #if !defined(mingw32_HOST_OS)
237 #define CALLER_SAVES_D6
238 #endif
239 
240 #define CALLER_SAVES_XMM1
241 #define CALLER_SAVES_XMM2
242 #define CALLER_SAVES_XMM3
243 #define CALLER_SAVES_XMM4
244 #define CALLER_SAVES_XMM5
245 #if !defined(mingw32_HOST_OS)
246 #define CALLER_SAVES_XMM6
247 #endif
248 
249 #define CALLER_SAVES_YMM1
250 #define CALLER_SAVES_YMM2
251 #define CALLER_SAVES_YMM3
252 #define CALLER_SAVES_YMM4
253 #define CALLER_SAVES_YMM5
254 #if !defined(mingw32_HOST_OS)
255 #define CALLER_SAVES_YMM6
256 #endif
257 
258 #define CALLER_SAVES_ZMM1
259 #define CALLER_SAVES_ZMM2
260 #define CALLER_SAVES_ZMM3
261 #define CALLER_SAVES_ZMM4
262 #define CALLER_SAVES_ZMM5
263 #if !defined(mingw32_HOST_OS)
264 #define CALLER_SAVES_ZMM6
265 #endif
266 
267 #define MAX_REAL_VANILLA_REG 6
268 #define MAX_REAL_FLOAT_REG   6
269 #define MAX_REAL_DOUBLE_REG  6
270 #define MAX_REAL_LONG_REG    0
271 #define MAX_REAL_XMM_REG     6
272 #define MAX_REAL_YMM_REG     6
273 #define MAX_REAL_ZMM_REG     6
274 
275 /* -----------------------------------------------------------------------------
276    The PowerPC register mapping
277 
278    0            system glue?    (caller-save, volatile)
279    1            SP              (callee-save, non-volatile)
280    2            AIX, powerpc64-linux:
281                     RTOC        (a strange special case)
282                 powerpc32-linux:
283                                 reserved for use by system
284 
285    3-10         args/return     (caller-save, volatile)
286    11,12        system glue?    (caller-save, volatile)
287    13           on 64-bit:      reserved for thread state pointer
288                 on 32-bit:      (callee-save, non-volatile)
289    14-31                        (callee-save, non-volatile)
290 
291    f0                           (caller-save, volatile)
292    f1-f13       args/return     (caller-save, volatile)
293    f14-f31                      (callee-save, non-volatile)
294 
295    \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes.
296    \tr{0}--\tr{12} are caller-save registers.
297 
298    \tr{%f14}--\tr{%f31} are callee-save floating-point registers.
299 
300    We can do the Whole Business with callee-save registers only!
301    -------------------------------------------------------------------------- */
302 
303 #elif defined(MACHREGS_powerpc)
304 
305 #define REG(x) __asm__(#x)
306 
307 #define REG_R1          r14
308 #define REG_R2          r15
309 #define REG_R3          r16
310 #define REG_R4          r17
311 #define REG_R5          r18
312 #define REG_R6          r19
313 #define REG_R7          r20
314 #define REG_R8          r21
315 
316 #define REG_F1          fr14
317 #define REG_F2          fr15
318 #define REG_F3          fr16
319 #define REG_F4          fr17
320 #define REG_F5          fr18
321 #define REG_F6          fr19
322 
323 #define REG_D1          fr20
324 #define REG_D2          fr21
325 #define REG_D3          fr22
326 #define REG_D4          fr23
327 #define REG_D5          fr24
328 #define REG_D6          fr25
329 
330 #define REG_Sp          r22
331 #define REG_SpLim       r24
332 
333 #define REG_Hp          r25
334 
335 #define REG_Base        r27
336 
337 /* -----------------------------------------------------------------------------
338    The Sun SPARC register mapping
339 
340    !! IMPORTANT: if you change this register mapping you must also update
341                  compiler/nativeGen/SPARC/Regs.hs. That file handles the
342                  mapping for the NCG. This one only affects via-c code.
343 
344    The SPARC register (window) story: Remember, within the Haskell
345    Threaded World, we essentially ``shut down'' the register-window
346    mechanism---the window doesn't move at all while in this World.  It
347    *does* move, of course, if we call out to arbitrary~C...
348 
349    The %i, %l, and %o registers (8 each) are the input, local, and
350    output registers visible in one register window.  The 8 %g (global)
351    registers are visible all the time.
352 
353       zero: always zero
354    scratch: volatile across C-fn calls. used by linker.
355        app: usable by application
356     system: reserved for system
357 
358      alloc: allocated to in the register allocator, intra-closure only
359 
360                 GHC usage     v8 ABI        v9 ABI
361    Global
362      %g0        zero        zero          zero
363      %g1        alloc       scratch       scrach
364      %g2        alloc       app           app
365      %g3        alloc       app           app
366      %g4        alloc       app           scratch
367      %g5                    system        scratch
368      %g6                    system        system
369      %g7                    system        system
370 
371    Output: can be zapped by callee
372      %o0-o5     alloc       caller saves
373      %o6                    C stack ptr
374      %o7                    C ret addr
375 
376    Local: maintained by register windowing mechanism
377      %l0        alloc
378      %l1        R1
379      %l2        R2
380      %l3        R3
381      %l4        R4
382      %l5        R5
383      %l6        alloc
384      %l7        alloc
385 
386    Input
387      %i0        Sp
388      %i1        Base
389      %i2        SpLim
390      %i3        Hp
391      %i4        alloc
392      %i5        R6
393      %i6                    C frame ptr
394      %i7                    C ret addr
395 
396    The paired nature of the floating point registers causes complications for
397    the native code generator.  For convenience, we pretend that the first 22
398    fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are
399    float (single) regs.  The NCG acts accordingly.  That means that the
400    following FP assignment is rather fragile, and should only be changed
401    with extreme care.  The current scheme is:
402 
403       %f0 /%f1    FP return from C
404       %f2 /%f3    D1
405       %f4 /%f5    D2
406       %f6 /%f7    ncg double spill tmp #1
407       %f8 /%f9    ncg double spill tmp #2
408       %f10/%f11   allocatable
409       %f12/%f13   allocatable
410       %f14/%f15   allocatable
411       %f16/%f17   allocatable
412       %f18/%f19   allocatable
413       %f20/%f21   allocatable
414 
415       %f22        F1
416       %f23        F2
417       %f24        F3
418       %f25        F4
419       %f26        ncg single spill tmp #1
420       %f27        ncg single spill tmp #2
421       %f28        allocatable
422       %f29        allocatable
423       %f30        allocatable
424       %f31        allocatable
425 
426    -------------------------------------------------------------------------- */
427 
428 #elif defined(MACHREGS_sparc)
429 
430 #define REG(x) __asm__("%" #x)
431 
432 #define CALLER_SAVES_USER
433 
434 #define CALLER_SAVES_F1
435 #define CALLER_SAVES_F2
436 #define CALLER_SAVES_F3
437 #define CALLER_SAVES_F4
438 #define CALLER_SAVES_D1
439 #define CALLER_SAVES_D2
440 
441 #define REG_R1          l1
442 #define REG_R2          l2
443 #define REG_R3          l3
444 #define REG_R4          l4
445 #define REG_R5          l5
446 #define REG_R6          i5
447 
448 #define REG_F1          f22
449 #define REG_F2          f23
450 #define REG_F3          f24
451 #define REG_F4          f25
452 
453 /* for each of the double arg regs,
454    Dn_2 is the high half. */
455 
456 #define REG_D1          f2
457 #define REG_D1_2        f3
458 
459 #define REG_D2          f4
460 #define REG_D2_2        f5
461 
462 #define REG_Sp          i0
463 #define REG_SpLim       i2
464 
465 #define REG_Hp          i3
466 
467 #define REG_Base        i1
468 
469 #define NCG_FirstFloatReg f22
470 
471 /* -----------------------------------------------------------------------------
472    The ARM EABI register mapping
473 
474    Here we consider ARM mode (i.e. 32bit isns)
475    and also CPU with full VFPv3 implementation
476 
477    ARM registers (see Chapter 5.1 in ARM IHI 0042D and
478    Section 9.2.2 in ARM Software Development Toolkit Reference Guide)
479 
480    r15  PC         The Program Counter.
481    r14  LR         The Link Register.
482    r13  SP         The Stack Pointer.
483    r12  IP         The Intra-Procedure-call scratch register.
484    r11  v8/fp      Variable-register 8.
485    r10  v7/sl      Variable-register 7.
486    r9   v6/SB/TR   Platform register. The meaning of this register is
487                    defined by the platform standard.
488    r8   v5         Variable-register 5.
489    r7   v4         Variable register 4.
490    r6   v3         Variable register 3.
491    r5   v2         Variable register 2.
492    r4   v1         Variable register 1.
493    r3   a4         Argument / scratch register 4.
494    r2   a3         Argument / scratch register 3.
495    r1   a2         Argument / result / scratch register 2.
496    r0   a1         Argument / result / scratch register 1.
497 
498    VFPv2/VFPv3/NEON registers
499    s0-s15/d0-d7/q0-q3    Argument / result/ scratch registers
500    s16-s31/d8-d15/q4-q7  callee-saved registers (must be preserved across
501                          subroutine calls)
502 
503    VFPv3/NEON registers (added to the VFPv2 registers set)
504    d16-d31/q8-q15        Argument / result/ scratch registers
505    ----------------------------------------------------------------------------- */
506 
507 #elif defined(MACHREGS_arm)
508 
509 #define REG(x) __asm__(#x)
510 
511 #define REG_Base        r4
512 #define REG_Sp          r5
513 #define REG_Hp          r6
514 #define REG_R1          r7
515 #define REG_R2          r8
516 #define REG_R3          r9
517 #define REG_R4          r10
518 #define REG_SpLim       r11
519 
520 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
521 /* d8 */
522 #define REG_F1    s16
523 #define REG_F2    s17
524 /* d9 */
525 #define REG_F3    s18
526 #define REG_F4    s19
527 
528 #define REG_D1    d10
529 #define REG_D2    d11
530 #endif
531 
532 /* -----------------------------------------------------------------------------
533    The ARMv8/AArch64 ABI register mapping
534 
535    The AArch64 provides 31 64-bit general purpose registers
536    and 32 128-bit SIMD/floating point registers.
537 
538    General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B)
539 
540    Register | Special | Role in the procedure call standard
541    ---------+---------+------------------------------------
542      SP     |         | The Stack Pointer
543      r30    |  LR     | The Link Register
544      r29    |  FP     | The Frame Pointer
545    r19-r28  |         | Callee-saved registers
546      r18    |         | The Platform Register, if needed;
547             |         | or temporary register
548      r17    |  IP1    | The second intra-procedure-call temporary register
549      r16    |  IP0    | The first intra-procedure-call scratch register
550     r9-r15  |         | Temporary registers
551      r8     |         | Indirect result location register
552     r0-r7   |         | Parameter/result registers
553 
554 
555    FPU/SIMD registers
556 
557    s/d/q/v0-v7    Argument / result/ scratch registers
558    s/d/q/v8-v15   callee-saved registers (must be preserved across subroutine calls,
559                   but only bottom 64-bit value needs to be preserved)
560    s/d/q/v16-v31  temporary registers
561 
562    ----------------------------------------------------------------------------- */
563 
564 #elif defined(MACHREGS_aarch64)
565 
566 #define REG(x) __asm__(#x)
567 
568 #define REG_Base        r19
569 #define REG_Sp          r20
570 #define REG_Hp          r21
571 #define REG_R1          r22
572 #define REG_R2          r23
573 #define REG_R3          r24
574 #define REG_R4          r25
575 #define REG_R5          r26
576 #define REG_R6          r27
577 #define REG_SpLim       r28
578 
579 #define REG_F1          s8
580 #define REG_F2          s9
581 #define REG_F3          s10
582 #define REG_F4          s11
583 
584 #define REG_D1          d12
585 #define REG_D2          d13
586 #define REG_D3          d14
587 #define REG_D4          d15
588 
589 #else
590 
591 #error Cannot find platform to give register info for
592 
593 #endif
594 
595 #else
596 
597 #error Bad MACHREGS_NO_REGS value
598 
599 #endif
600 
601 /* -----------------------------------------------------------------------------
602  * These constants define how many stg registers will be used for
603  * passing arguments (and results, in the case of an unboxed-tuple
604  * return).
605  *
606  * We usually set MAX_REAL_VANILLA_REG and co. to be the number of the
607  * highest STG register to occupy a real machine register, otherwise
608  * the calling conventions will needlessly shuffle data between the
609  * stack and memory-resident STG registers.  We might occasionally
610  * set these macros to other values for testing, though.
611  *
612  * Registers above these values might still be used, for instance to
613  * communicate with PrimOps and RTS functions.
614  */
615 
616 #if !defined(MAX_REAL_VANILLA_REG)
617 #  if   defined(REG_R10)
618 #  define MAX_REAL_VANILLA_REG 10
619 #  elif   defined(REG_R9)
620 #  define MAX_REAL_VANILLA_REG 9
621 #  elif   defined(REG_R8)
622 #  define MAX_REAL_VANILLA_REG 8
623 #  elif defined(REG_R7)
624 #  define MAX_REAL_VANILLA_REG 7
625 #  elif defined(REG_R6)
626 #  define MAX_REAL_VANILLA_REG 6
627 #  elif defined(REG_R5)
628 #  define MAX_REAL_VANILLA_REG 5
629 #  elif defined(REG_R4)
630 #  define MAX_REAL_VANILLA_REG 4
631 #  elif defined(REG_R3)
632 #  define MAX_REAL_VANILLA_REG 3
633 #  elif defined(REG_R2)
634 #  define MAX_REAL_VANILLA_REG 2
635 #  elif defined(REG_R1)
636 #  define MAX_REAL_VANILLA_REG 1
637 #  else
638 #  define MAX_REAL_VANILLA_REG 0
639 #  endif
640 #endif
641 
642 #if !defined(MAX_REAL_FLOAT_REG)
643 #  if   defined(REG_F4)
644 #  define MAX_REAL_FLOAT_REG 4
645 #  elif defined(REG_F3)
646 #  define MAX_REAL_FLOAT_REG 3
647 #  elif defined(REG_F2)
648 #  define MAX_REAL_FLOAT_REG 2
649 #  elif defined(REG_F1)
650 #  define MAX_REAL_FLOAT_REG 1
651 #  else
652 #  define MAX_REAL_FLOAT_REG 0
653 #  endif
654 #endif
655 
656 #if !defined(MAX_REAL_DOUBLE_REG)
657 #  if   defined(REG_D2)
658 #  define MAX_REAL_DOUBLE_REG 2
659 #  elif defined(REG_D1)
660 #  define MAX_REAL_DOUBLE_REG 1
661 #  else
662 #  define MAX_REAL_DOUBLE_REG 0
663 #  endif
664 #endif
665 
666 #if !defined(MAX_REAL_LONG_REG)
667 #  if   defined(REG_L1)
668 #  define MAX_REAL_LONG_REG 1
669 #  else
670 #  define MAX_REAL_LONG_REG 0
671 #  endif
672 #endif
673 
674 #if !defined(MAX_REAL_XMM_REG)
675 #  if   defined(REG_XMM6)
676 #  define MAX_REAL_XMM_REG 6
677 #  elif defined(REG_XMM5)
678 #  define MAX_REAL_XMM_REG 5
679 #  elif defined(REG_XMM4)
680 #  define MAX_REAL_XMM_REG 4
681 #  elif defined(REG_XMM3)
682 #  define MAX_REAL_XMM_REG 3
683 #  elif defined(REG_XMM2)
684 #  define MAX_REAL_XMM_REG 2
685 #  elif defined(REG_XMM1)
686 #  define MAX_REAL_XMM_REG 1
687 #  else
688 #  define MAX_REAL_XMM_REG 0
689 #  endif
690 #endif
691 
692 /* define NO_ARG_REGS if we have no argument registers at all (we can
693  * optimise certain code paths using this predicate).
694  */
695 #if MAX_REAL_VANILLA_REG < 2
696 #define NO_ARG_REGS
697 #else
698 #undef NO_ARG_REGS
699 #endif
700