1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-s390x-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright IBM Corp. 2010-2017
12  Copyright (C) 2011-2017, Florian Krohm (britzel@acm.org)
13
14  This program is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  This program is distributed in the hope that it will be useful, but
20  WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27  02111-1307, USA.
28
29  The GNU General Public License is contained in the file COPYING.
30*/
31
32/* Contributed by Florian Krohm and Christian Borntraeger */
33
34#include "pub_core_basics_asm.h"
35#include "pub_core_dispatch_asm.h"
36#include "pub_core_transtab_asm.h"
37#include "libvex_guest_offsets.h"
38#include "libvex_s390x_common.h"
39
40#if defined(VGA_s390x)
41
42/*------------------------------------------------------------*/
43/*---                                                      ---*/
44/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
45/*--- used to run all translations,                        ---*/
46/*--- including no-redir ones.                             ---*/
47/*---                                                      ---*/
48/*------------------------------------------------------------*/
49
50/* Convenience definitions for readability */
51#undef  SP
52#define SP S390_REGNO_STACK_POINTER
53
54#undef  LR
55#define LR S390_REGNO_LINK_REGISTER
56
57/* Location of valgrind's saved FPC register */
58#define S390_LOC_SAVED_FPC_V S390_OFFSET_SAVED_FPC_V(SP)
59
60/* Location of saved R2 register */
61#define S390_LOC_SAVED_R2 S390_OFFSET_SAVED_R2(SP)
62
63
64/*----------------------------------------------------*/
65/*--- Entry and preamble (set everything up)       ---*/
66/*----------------------------------------------------*/
67
68/* signature:
69void VG_(disp_run_translations)( UWord* two_words,
70                                 void*  guest_state,
71                                 Addr   host_addr );
72
73        Return results are placed in two_words:
74
75        two_words[0] is set to the TRC
76        two_words[1] is set to the address to patch (in case two_words[0] is
77                     VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP). Otherwise, it is 0.
78*/
79        .text
80        .align   4
81        .globl   VG_(disp_run_translations)
82        .type    VG_(disp_run_translations), @function
83VG_(disp_run_translations):
84
85        /* r2 holds two_words */
86        /* r3 holds pointer to guest_state */
87        /* r4 holds host_addr, i.e. the address of the translation to run */
88
89        /* Save gprs   ABI: r6...r13 and r15 */
90        stmg  %r6,%r15,48(SP)
91
92        /* New stack frame */
93        aghi  SP,-S390_INNERLOOP_FRAME_SIZE
94
95        /* Save fprs:   ABI: f8...f15 */
96        std   %f8,160+0(SP)
97        std   %f9,160+8(SP)
98        std   %f10,160+16(SP)
99        std   %f11,160+24(SP)
100        std   %f12,160+32(SP)
101        std   %f13,160+40(SP)
102        std   %f14,160+48(SP)
103        std   %f15,160+56(SP)
104
105        /* Load address of guest state into guest state register (r13) */
106        lgr   %r13,%r3
107
108        /* Save R2 on stack. In postamble it will be restored such that the
109           return values can be written */
110        stg   %r2,S390_LOC_SAVED_R2
111
112        /* Save valgrind's FPC on stack so postamble can restore
113           it later . */
114        stfpc S390_LOC_SAVED_FPC_V
115
116        /* Load the FPC the way the client code wants it. I.e. pull the
117           value from the guest state. */
118        lfpc  OFFSET_s390x_fpc(%r13)
119
120        /* Jump into the code cache.  Chained translations in
121           the code cache run, until for whatever reason, they can't
122           continue.  When that happens, the translation in question
123           will jump (or call) to one of the continuation points
124           VG_(cp_...) below. */
125        br    %r4
126
127
128/*----------------------------------------------------*/
129/*--- Postamble and return to C code.              ---*/
130/*----------------------------------------------------*/
131
132postamble:
133        /* At this point, %r0 and %r1 contain two
134           words to be returned to the caller.  %r0
135           holds a TRC value, and %r1 optionally may
136           hold another word (for CHAIN_ME exits, the
137           address of the place to patch.) */
138
139        /* We're leaving. AMD has some code here to check invariants.
140           We don't have (need) that, as we save and restore the FPC register
141           whenever we switch between valgrind proper to client code. */
142
143	/* Restore valgrind's FPC, as client code may have changed it. */
144        lfpc S390_LOC_SAVED_FPC_V
145
146        /* Restore %r2 from stack; holds address of two_words */
147        lg   %r2,S390_LOC_SAVED_R2
148
149        stg  %r0,0(%r2)         /* Store %r0 to two_words[0] */
150        stg  %r1,8(%r2)         /* Store %r1 to two_words[1] */
151
152        /* Restore callee-saved registers... */
153
154        /* Floating-point regs */
155        ld   %f8,160+0(SP)
156        ld   %f9,160+8(SP)
157        ld   %f10,160+16(SP)
158        ld   %f11,160+24(SP)
159        ld   %f12,160+32(SP)
160        ld   %f13,160+40(SP)
161        ld   %f14,160+48(SP)
162        ld   %f15,160+56(SP)
163
164        /* Remove stack frame */
165        aghi SP,S390_INNERLOOP_FRAME_SIZE
166
167        /* General-purpose regs. This also restores the original link
168           register (r14) and stack pointer (r15). */
169        lmg  %r6,%r15,48(SP)
170
171        /* Return */
172        br   LR
173
174
175/*----------------------------------------------------*/
176/*--- Continuation points                          ---*/
177/*----------------------------------------------------*/
178
179/* ------ Chain me to slow entry point ------ */
180        .global VG_(disp_cp_chain_me_to_slowEP)
181VG_(disp_cp_chain_me_to_slowEP):
182        /* When we come here %r1 contains the address of the place to patch.
183           The return values (TRC, address-to-patch) are stored here in
184           %r0 and %r1, respectively */
185        lghi    %r0,VG_TRC_CHAIN_ME_TO_SLOW_EP
186        j       postamble
187
188
189/* ------ Chain me to fast entry point ------ */
190        .global VG_(disp_cp_chain_me_to_fastEP)
191VG_(disp_cp_chain_me_to_fastEP):
192        /* Identical to VG_(disp_cp_chain_me_to_slowEP), except value of %r0. */
193        lghi    %r0,VG_TRC_CHAIN_ME_TO_FAST_EP
194        j       postamble
195
196
197/* ------ Indirect but boring jump ------ */
198        .global VG_(disp_cp_xindir)
199VG_(disp_cp_xindir):
200	/* Where are we going? */
201        lg      %r2, OFFSET_s390x_IA(%r13)
202
203        /* Increment VG_(stats__n_xindirs_32) */
204        larl    %r8, VG_(stats__n_xindirs_32)
205        l       %r10,0(%r8)
206        ahi     %r10,1
207        st      %r10,0(%r8)
208
209	/* Try a fast lookup in the translation cache:
210           Compute offset (not index) into VT_(tt_fast):
211
212           offset = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
213
214           with VG_TT_FAST_HASH(addr) == (addr >> 1) & VG_TT_FAST_MASK
215           and  sizeof(FastCacheEntry) == 16
216
217           offset = ((addr >> 1) & VG_TT_FAST_MASK) << 4
218           which is
219           offset = ((addr & (VG_TT_FAST_MASK << 1) ) << 3
220        */
221        larl    %r8, VG_(tt_fast)
222        llill   %r5,(VG_TT_FAST_MASK << 1) & 0xffff
223#if ((( VG_TT_FAST_MASK << 1) & 0xffff0000) >> 16 != 0)
224        iilh    %r5,((VG_TT_FAST_MASK << 1) & 0xffff0000) >> 16
225#endif
226        ngr     %r5,%r2
227        sllg    %r7,%r5,3
228        lg      %r11, 8(%r8,%r7)      /* .host */
229        cg      %r2,  0(%r8,%r7)      /* next guest address == .guest ? */
230        jne     fast_lookup_failed
231
232        /* Found a match.  Call .host.
233           r11 is an address. There we will find the instrumented client code.
234           That code may modify the guest state register r13. */
235        br      %r11
236        .long   0x0   /* persuade insn decoders not to speculate past here */
237
238fast_lookup_failed:
239        /* Increment VG_(stats__n_xindir_misses_32) */
240        larl    %r8, VG_(stats__n_xindir_misses_32)
241        l       %r10,0(%r8)
242        ahi     %r10,1
243        st      %r10,0(%r8)
244
245        lghi    %r0,VG_TRC_INNER_FASTMISS
246        lghi    %r1,0
247        j       postamble
248
249
250/* ------ Assisted jump ------ */
251        .global VG_(disp_cp_xassisted)
252VG_(disp_cp_xassisted):
253        /* guest-state-pointer contains the TRC. Put the value into the
254           return register */
255        lgr     %r0,%r13
256        lghi    %r1,0
257        j       postamble
258
259
260/* ------ Event check failed ------ */
261        .global VG_(disp_cp_evcheck_fail)
262VG_(disp_cp_evcheck_fail):
263        lghi    %r0,VG_TRC_INNER_COUNTERZERO
264        lghi    %r1,0
265        j       postamble
266
267
268        .size VG_(disp_run_translations), .-VG_(disp_run_translations)
269
270#endif /* VGA_s390x */
271
272/* Let the linker know we don't need an executable stack */
273MARK_STACK_NO_EXEC
274
275/*--------------------------------------------------------------------*/
276/*--- end                                   dispatch-s390x-linux.S ---*/
277/*--------------------------------------------------------------------*/
278