1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 2005-2016. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 
22 #include <stddef.h>	/* offsetof() */
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 #include "global.h"
27 #include "erl_binary.h"
28 
29 #include "hipe_arch.h"
30 #include "hipe_native_bif.h"	/* nbif_callemu() */
31 #include "hipe_bif0.h"
32 
33 /* Flush dcache and invalidate icache for a range of addresses. */
hipe_flush_icache_range(void * address,unsigned int nbytes)34 void hipe_flush_icache_range(void *address, unsigned int nbytes)
35 {
36 #if defined(__ARM_EABI__)
37     register unsigned long beg __asm__("r0") = (unsigned long)address;
38     register unsigned long end __asm__("r1") = (unsigned long)address + nbytes;
39     register unsigned long flg __asm__("r2") = 0;
40     register unsigned long scno __asm__("r7") = 0xf0002;
41     __asm__ __volatile__("swi 0"	/* sys_cacheflush() */
42 			 : "=r"(beg)
43 			 : "0"(beg), "r"(end), "r"(flg), "r"(scno));
44 #else
45     register unsigned long beg __asm__("r0") = (unsigned long)address;
46     register unsigned long end __asm__("r1") = (unsigned long)address + nbytes;
47     register unsigned long flg __asm__("r2") = 0;
48     __asm__ __volatile__("swi 0x9f0002"	/* sys_cacheflush() */
49 			 : "=r"(beg)
50 			 : "0"(beg), "r"(end), "r"(flg));
51 #endif
52 }
53 
hipe_flush_icache_word(void * address)54 void hipe_flush_icache_word(void *address)
55 {
56     hipe_flush_icache_range(address, 4);
57 }
58 
59 
check_callees(Eterm callees)60 static int check_callees(Eterm callees)
61 {
62     Eterm *tuple;
63     Uint arity;
64     Uint i;
65 
66     if (is_not_tuple(callees))
67 	return -1;
68     tuple = tuple_val(callees);
69     arity = arityval(tuple[0]);
70     for (i = 1; i <= arity; ++i) {
71 	Eterm mfa = tuple[i];
72 	if (is_atom(mfa))
73 	    continue;
74 	if (is_not_tuple(mfa) ||
75 	    tuple_val(mfa)[0] != make_arityval(3) ||
76 	    is_not_atom(tuple_val(mfa)[1]) ||
77 	    is_not_atom(tuple_val(mfa)[2]) ||
78 	    is_not_small(tuple_val(mfa)[3]) ||
79 	    unsigned_val(tuple_val(mfa)[3]) > 255)
80 	    return -1;
81     }
82     return arity;
83 }
84 
85 #define TRAMPOLINE_WORDS 2
86 
generate_trampolines(Uint32 * address,int nrcallees,Eterm callees,Uint32 ** trampvec)87 static void generate_trampolines(Uint32* address,
88                                  int nrcallees, Eterm callees,
89                                  Uint32** trampvec)
90 {
91     Uint32* trampoline = address;
92     int i;
93 
94     for (i = 0; i < nrcallees; ++i) {
95         trampoline[0] = 0xE51FF004;     /* ldr pc, [pc,#-4] */
96         trampoline[1] = 0;		/* callee's address */
97 	trampvec[i] = trampoline;
98         trampoline += TRAMPOLINE_WORDS;
99     }
100     hipe_flush_icache_range(address, nrcallees*2*sizeof(Uint32));
101 }
102 
hipe_alloc_code(Uint nrbytes,Eterm callees,Eterm * trampolines,Process * p)103 void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
104 {
105     Uint code_words;
106     int nrcallees;
107     Eterm trampvecbin;
108     Uint32 **trampvec;
109     Uint32 *address;
110 
111     if (nrbytes & 0x3)
112 	return NULL;
113     code_words = nrbytes / sizeof(Uint32);
114 
115     nrcallees = check_callees(callees);
116     if (nrcallees < 0)
117 	return NULL;
118     trampvecbin = new_binary(p, NULL, nrcallees*sizeof(Uint32*));
119     trampvec = (Uint32**)binary_bytes(trampvecbin);
120 
121     address = erts_alloc(ERTS_ALC_T_HIPE_EXEC,
122                          (code_words + nrcallees*TRAMPOLINE_WORDS)*sizeof(Uint32));
123 
124     generate_trampolines(address + code_words, nrcallees, callees, trampvec);
125     *trampolines = trampvecbin;
126     return address;
127 }
128 
hipe_free_code(void * code,unsigned int bytes)129 void  hipe_free_code(void* code, unsigned int bytes)
130 {
131     erts_free(ERTS_ALC_T_HIPE_EXEC, code);
132 }
133 
134 /*
135  * ARMv5's support for 32-bit immediates is effectively non-existent.
136  * Hence, every 32-bit immediate is stored in memory and loaded via
137  * a PC-relative addressing mode. Relocation entries refer to those
138  * data words, NOT the load instructions, so patching is trivial.
139  */
patch_imm32(Uint32 * address,unsigned int imm32)140 static void patch_imm32(Uint32 *address, unsigned int imm32)
141 {
142     *address = imm32;
143     hipe_flush_icache_word(address);
144 }
145 
hipe_patch_load_fe(Uint32 * address,Uint value)146 void hipe_patch_load_fe(Uint32 *address, Uint value)
147 {
148     patch_imm32(address, value);
149 }
150 
hipe_patch_insn(void * address,Uint32 value,Eterm type)151 int hipe_patch_insn(void *address, Uint32 value, Eterm type)
152 {
153     switch (type) {
154       case am_closure:
155       case am_constant:
156       case am_atom:
157       case am_c_const:
158 	break;
159       default:
160 	return -1;
161     }
162     patch_imm32((Uint32*)address, value);
163     return 0;
164 }
165 
166 /* Make stub for native code calling exported beam function
167 */
hipe_make_native_stub(void * callee_exp,unsigned int beamArity)168 void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)
169 {
170     unsigned int *code;
171     int callemu_offset;
172     int is_short_jmp;
173 
174     /*
175      * Native code calls BEAM via a stub looking as follows:
176      *
177      * mov r0, #beamArity
178      * ldr r8, [pc,#0] // callee_exp
179      * b nbif_callemu
180      * .long callee_exp
181      *
182      * or if nbif_callemu is too far away:
183      *
184      * mov r0, #beamArity
185      * ldr r8, [pc,#0] // callee_exp
186      * ldr pc, [pc,#0] // nbif_callemu
187      * .long callee_exp
188      * .long nbif_callemu
189      *
190      * I'm using r0 and r8 since they aren't used for
191      * parameter passing in native code.
192      */
193 
194     code = erts_alloc(ERTS_ALC_T_HIPE_EXEC, 5*sizeof(Uint32));
195     if (!code)
196 	return NULL;
197     callemu_offset = ((int)&nbif_callemu - ((int)&code[2] + 8)) >> 2;
198     is_short_jmp = (callemu_offset >= -0x00800000 &&
199                     callemu_offset <= 0x007FFFFF);
200 #ifdef DEBUG
201     if (is_short_jmp && (callemu_offset % 3)==0) {
202         is_short_jmp = 0;
203     }
204 #endif
205 
206     /* mov r0, #beamArity */
207     code[0] = 0xE3A00000 | (beamArity & 0xFF);
208     /* ldr r8, [pc,#0] // callee_exp */
209     code[1] = 0xE59F8000;
210     if (is_short_jmp) {
211         /* b nbif_callemu */
212         code[2] = 0xEA000000 | (callemu_offset & 0x00FFFFFF);
213     }
214     else {
215         /* ldr pc, [pc,#0] // nbif_callemu */
216         code[2] = 0xE59FF000;
217         /* .long nbif_callemu */
218         code[4] = (unsigned int)&nbif_callemu;
219     }
220     /* .long callee_exp */
221     code[3] = (unsigned int)callee_exp;
222 
223     hipe_flush_icache_range(code, 5*sizeof(Uint32));
224 
225     return code;
226 }
227 
hipe_free_native_stub(void * stub)228 void hipe_free_native_stub(void* stub)
229 {
230     erts_free(ERTS_ALC_T_HIPE_EXEC, stub);
231 }
232 
patch_b(Uint32 * address,Sint32 offset,Uint32 AA)233 static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA)
234 {
235     Uint32 oldI = *address;
236     Uint32 newI = (oldI & 0xFF000000) | (offset & 0x00FFFFFF);
237     *address = newI;
238     hipe_flush_icache_word(address);
239 }
240 
hipe_patch_call(void * callAddress,void * destAddress,void * trampoline)241 int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
242 {
243     Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2;
244     if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) {
245 	/* The destination is within a [-32MB,+32MB[ range from us.
246 	   We can reach it with a b/bl instruction.
247 	   This is typical for nearby Erlang code. */
248 	patch_b((Uint32*)callAddress, destOffset, 0);
249     } else {
250 	/* The destination is too distant for b/bl.
251 	   Must do a b/bl to the trampoline. */
252 	Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2;
253 	if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) {
254 	    /* Update the trampoline's address computation.
255 	       (May be redundant, but we can't tell.) */
256 	    patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress);
257 	    /* Update this call site. */
258 	    patch_b((Uint32*)callAddress, trampOffset, 0);
259 	} else
260 	    return -1;
261     }
262     return 0;
263 }
264 
hipe_arch_print_pcb(struct hipe_process_state * p)265 void hipe_arch_print_pcb(struct hipe_process_state *p)
266 {
267 #define U(n,x) \
268     printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "")
269     U("nra        ", nra);
270     U("narity     ", narity);
271 #undef U
272 }
273